diff options
author | Arthur Centeno | 2021-06-15 15:33:59 +0000 |
---|---|---|
committer | Arthur Centeno | 2021-06-15 15:33:59 +0000 |
commit | c1a6ca69f7c48d99b6c5d62e56a445583fd4c08b (patch) | |
tree | f98fccc44829e3f52e585d6b41a1942aa52bd8b7 | |
parent | 7e49c006af9c4f7453c3578a7d4f1fc4d7bdf3ed (diff) | |
parent | 9e9e0e4d440383f617542e810a1115833eafd7bf (diff) | |
download | genenetwork2-c1a6ca69f7c48d99b6c5d62e56a445583fd4c08b.tar.gz |
Merge branch 'testing' of github.com:genenetwork/genenetwork2 into acenteno
167 files changed, 7703 insertions, 6876 deletions
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index a36abc0a..f279a7e5 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -11,7 +11,7 @@ on: jobs: unittest: runs-on: ubuntu-latest - container: bonfacekilz/python3-genenetwork2:ad741c1 + container: bonfacekilz/genenetwork2:latest steps: # First start with mariadb set then checkout. The checkout gives @@ -31,7 +31,7 @@ jobs: run: | /gn2-profile/bin/screen -dmLS redisconn /gn2-profile/bin/redis-server - # Redis is required by some of the tests 6379 + # Initialise the tables - name: Bootstrap tables run: | mysql -u root -e "SHOW DATABASES;" @@ -39,28 +39,27 @@ jobs: mysql -u root -e "CREATE USER 'gn2'@'localhost' IDENTIFIED BY 'mysql_password';" mysql -u root -e "GRANT ALL PRIVILEGES ON *.* TO 'gn2'@'localhost';FLUSH PRIVILEGES;" - - name: Run the unit tests + - name: Start Genenetwork as a Background Task run: | - env GN2_PROFILE=/gn2-profile \ + /gn2-profile/bin/screen -dm bash -c "env GN2_PROFILE=/gn2-profile \ TMPDIR=/tmp SERVER_PORT=5004 \ WEBSERVER_MODE=DEBUG LOG_LEVEL=DEBUG \ GENENETWORK_FILES=/genotype_files/ bin/genenetwork2 \ - etc/default_settings.py -c -m unittest discover -v + etc/default_settings.py" - - name: Start Genenetwork as a Background Task + - name: Run the unit tests run: | env GN2_PROFILE=/gn2-profile \ TMPDIR=/tmp SERVER_PORT=5004 \ WEBSERVER_MODE=DEBUG LOG_LEVEL=DEBUG \ GENENETWORK_FILES=/genotype_files/ bin/genenetwork2 \ - etc/default_settings.py& + etc/default_settings.py -c -m unittest discover -v - name: Test for Broken Links run: | - env GN2_PROFILE=/gn2-profile \ TMPDIR=/tmp\ WEBSERVER_MODE=DEBUG LOG_LEVEL=DEBUG \ GENENETWORK_FILES=/genotype_files/ bin/genenetwork2 \ - etc/default_settings.py -c /__w/genenetwork2/genenetwork2/test/requests/links_scraper/genelinks.py - + etc/default_settings.py -c \ + $PWD/test/requests/links_scraper/genelinks.py diff --git a/doc/database.org b/doc/database.org index 5107b660..d5462d4e 100644 --- a/doc/database.org +++ b/doc/database.org @@ -1339,7 +1339,8 @@ The SNP count info for the BXD is calculated like this startMb += stepMb #+end_src -select * from BXDSnpPosition limit 5; +: select * from BXDSnpPosition limit 5; + +------+-----------+-----------+----------+ | Chr | StrainId1 | StrainId2 | Mb | +------+-----------+-----------+----------+ @@ -1368,3 +1369,150 @@ mysql> select * from SnpSource limit 5; Empty set (0.00 sec) Hmmm. This is the test database. Then there are the plink files and VCF files. + +* Optimize SQL? + +We were facing some issues with slow queries. A query +was really slow on Penguin2: + +: time mysql -u webqtlout -pwebqtlout db_webqtl < ~/chunk.sql > /dev/null +: real 0m13.082s +: user 0m0.292s +: sys 0m0.032s + +Runs in 1s on Tux01 and 13s on P2, why is that? The gist of it +was increasing an InnoDB cache size(!) + +Interestingly, Penguin2 is running InnoDB on a much slower storage. +It has more indices that Tux01(?!). Probably due to things we have +been trying to make the datatables faster. + +Meanwhile the query is one with many joins: + +#+begin_src sql +SELECT ProbeSet.Name,ProbeSetXRef.DataId, T4.value, T5.value, T6.value, T7.value, T8.value, T9.value, T10.value, T11.value, T12.value, T14.value, T15.value, T17.value, T18.value, T19.value, T20.value, T21.value, T22.value, T24.value, T25.value, T26.value, T28.value, T29.value, T30.value, T31.value, T35.value, T36.value, T37.value, T39.value, T98.value, T99.value, T100.value, T103.value, T487.value, T105.value, T106.value, T110.value FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze) + left join ProbeSetData as T4 on T4.Id = ProbeSetXRef.DataId + and T4.StrainId=4 + (...) + left join ProbeSetData as T110 on T110.Id = ProbeSetXRef.DataId + and T110.StrainId=110 + WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id + and ProbeSetFreeze.Name = 'HC_M2_0606_P' + and ProbeSet.Id = ProbeSetXRef.ProbeSetId + order by ProbeSet.Id +#+end_src + +And is blazingly fast on Tux01 and (now) fast enough on Penguin2. + +First I checked the tables for indices and storage type. Next I +checked the difference in configuration. + +** Check tables + +Tables (ProbeSetData, ProbeSet, ProbeSetXRef, ProbeSetFreeze) + +*** ProbeSetData + +Same on Tux01 and P2: + +: show indexes from ProbeSetData ; + +| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | +|--------------+------------+----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------| +| ProbeSetData | 0 | DataId | 1 | Id | A | 47769944 | NULL | NULL | | BTREE | | | +| ProbeSetData | 0 | DataId | 2 | StrainId | A | 5111384047 | NULL | NULL | | BTREE | | | + +*** ProbeSetFreeze + +Tux01 has less indexes than P2(!): + +| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | +|----------------+------------+-----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------| +| ProbeSetFreeze | 0 | PRIMARY | 1 | Id | A | 911 | NULL | NULL | | BTREE | | | +| ProbeSetFreeze | 0 | FullName | 1 | FullName | A | 911 | NULL | NULL | | BTREE | | | +| ProbeSetFreeze | 0 | Name | 1 | Name | A | 911 | NULL | NULL | YES | BTREE | | | +| ProbeSetFreeze | 1 | NameIndex | 1 | Name2 | A | 911 | NULL | NULL | | BTREE | | | +: 4 rows in set (0.000 sec) + +| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | +|----------------+------------+-----------------+--------------+-----------------+-----------+-------------+----------+--------+------+------------+---------+---------------| +| ProbeSetFreeze | 0 | PRIMARY | 1 | Id | A | 883 | NULL | NULL | | BTREE | | | +| ProbeSetFreeze | 0 | FullName | 1 | FullName | A | 883 | NULL | NULL | | BTREE | | | +| ProbeSetFreeze | 0 | Name | 1 | Name | A | 883 | NULL | NULL | YES | BTREE | | | +| ProbeSetFreeze | 1 | NameIndex | 1 | Name2 | A | 883 | NULL | NULL | | BTREE | | | +| ProbeSetFreeze | 1 | ShortName | 1 | ShortName | A | 883 | NULL | NULL | | BTREE | | | +| ProbeSetFreeze | 1 | ProbeFreezeId | 1 | ProbeFreezeId | A | 441 | NULL | NULL | | BTREE | | | +| ProbeSetFreeze | 1 | conf_and_public | 1 | confidentiality | A | 3 | NULL | NULL | | BTREE | | | +| ProbeSetFreeze | 1 | conf_and_public | 2 | public | A | 4 | NULL | NULL | | BTREE | | | +: 8 rows in set (0.00 sec) + +*** ProbeSet + +Identical indexes + +*** ProbeSetXRef + +Tux01 has less indexes than P2(!): + +: MariaDB [db_webqtl]> show indexes from ProbeSetXRef ; +| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | +|--------------+------------+------------+--------------+------------------+-----------+-------------+----------+--------+------+------------+---------+---------------| +| ProbeSetXRef | 0 | ProbeSetId | 1 | ProbeSetFreezeId | A | 885 | NULL | NULL | | BTREE | | | +| ProbeSetXRef | 0 | ProbeSetId | 2 | ProbeSetId | A | 47713039 | NULL | NULL | | BTREE | | | +| ProbeSetXRef | 0 | DataId_IDX | 1 | DataId | A | 47713039 | NULL | NULL | | BTREE | | | +| ProbeSetXRef | 1 | Locus_IDX | 1 | Locus | A | 15904346 | NULL | NULL | YES | BTREE | | | +: 4 rows in set (0.000 sec) + + + +: MariaDB [db_webqtl]> show indexes from ProbeSetXRef ; +| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | +|--------------+------------+-------------+--------------+------------------+-----------+-------------+----------+--------+------+------------+---------+---------------| +| ProbeSetXRef | 0 | ProbeSetId | 1 | ProbeSetFreezeId | A | 856 | NULL | NULL | | BTREE | | | +| ProbeSetXRef | 0 | ProbeSetId | 2 | ProbeSetId | A | 46412145 | NULL | NULL | | BTREE | | | +| ProbeSetXRef | 0 | DataId_IDX | 1 | DataId | A | 46412145 | NULL | NULL | | BTREE | | | +| ProbeSetXRef | 1 | ProbeSetId1 | 1 | ProbeSetId | A | 5156905 | NULL | NULL | | BTREE | | | +| ProbeSetXRef | 1 | Locus | 1 | Locus | A | 23206072 | NULL | NULL | YES | BTREE | | | +: 5 rows in set (0.00 sec) + +** Check storage + +The database in Tux01 is mounted on NVME. On Penguin2 it +is slower SATA with RAID5. + +Also on Penguin2 the following tables are using InnoDB instead of +MyISAM + +#+begin_src sh +-rw-rw---- 1 mysql mysql 79691776 Oct 15 2019 AccessLog.ibd +-rw-rw---- 1 mysql mysql 196608 Oct 24 2019 Docs.ibd +-rw-rw---- 1 mysql mysql 63673729024 Jul 10 2020 GenoData.ibd +-rw-rw---- 1 mysql mysql 34787557376 Jul 9 2020 ProbeData.ibd +-rw-rw---- 1 mysql mysql 254690721792 Jul 10 2020 ProbeSetData.ibd +-rw-rw---- 1 mysql mysql 32103202816 Jul 9 2020 SnpAll.ibd +-rw-rw---- 1 mysql mysql 98304 May 6 2020 TraitMetadata.ibd +#+end_src + +This [[https://www.liquidweb.com/kb/mysql-performance-myisam-vs-innodb/][article]] suggests that myISAM will be faster for our use case. + +** Configuration + +There was one setting on Tux01 missing on P2 + +: +innodb_buffer_pool_size=1024M + +Running the same query twice (so you can see the warmup after +a restart of MariaDB) + +#+begin_src sh +penguin2:/etc$ time mysql -u webqtlout -pwebqtlout db_webqtl < ~/chunk.sql > ~/test.out +real 0m4.253s +user 0m0.276s +sys 0m0.040s +penguin2:/etc$ time mysql -u webqtlout -pwebqtlout db_webqtl < ~/chunk.sql > ~/test.out +real 0m2.633s +user 0m0.296s +sys 0m0.028s +#+end_src + +That is much better :) diff --git a/doc/docker-container.org b/doc/docker-container.org index ec91824a..ef0d71fc 100644 --- a/doc/docker-container.org +++ b/doc/docker-container.org @@ -32,13 +32,13 @@ First create the gn2 tar archive by running: env GUIX_PACKAGE_PATH="/home/bonface/projects/guix-bioinformatics:/home/bonface/projects/guix-past/modules" \ ./pre-inst-env guix pack --no-grafts\ -S /gn2-profile=/ \ - screen genenetwork2 + screen python2-genenetwork2 # For the Python 3 version: env GUIX_PACKAGE_PATH="/home/bonface/projects/guix-bioinformatics:/home/bonface/projects/guix-past/modules" \ ./pre-inst-env guix pack --no-grafts\ -S /gn2-profile=/ \ - screen python3-genenetwork2 + screen genenetwork2 #+end_src The output will look something similar to: @@ -59,16 +59,16 @@ RUN tar -xzf /tmp/gn2.tar.gz -C / && rm -f /tmp/gn2.tar.gz && \ Build the image(Note the fullstop at the end): -: sudo docker build -t python2-genenetwork2:latest -f Dockerfile . +: sudo docker build -t genenetwork2:latest -f Dockerfile . To load the image interactively you've just created: -: docker run -ti "python2-genenetwork2:latest" bash +: docker run -ti "genenetwork2:latest" bash Assuming you have a docker instance running, you could always run commands in it e.g: -: docker run "python2-genenetwork2:latest" python --version +: docker run "genenetwork2:latest" python --version * Pushing to DockerHub @@ -78,7 +78,7 @@ CI environment using Github Actions. To push to dockerhub, first get the image name by running =docker images=. Push to dockerhub using a command similar to: -: docker push bonfacekilz/python2-genenetwork2:latest +: docker push bonfacekilz/genenetwork2:latest Right now, we have 2 images on DockerHub: diff --git a/etc/default_settings.py b/etc/default_settings.py index 27522187..a194b10e 100644 --- a/etc/default_settings.py +++ b/etc/default_settings.py @@ -73,23 +73,24 @@ SMTP_PASSWORD = "UNKNOWN" # ---- Behavioural settings (defaults) note that logger and log levels can # be overridden at the module level and with enviroment settings -WEBSERVER_MODE = 'DEV' # Python webserver mode (DEBUG|DEV|PROD) +WEBSERVER_MODE = 'DEV' # Python webserver mode (DEBUG|DEV|PROD) WEBSERVER_BRANDING = None # Set the branding (nyi) WEBSERVER_DEPLOY = None # Deployment specifics (nyi) -WEBSERVER_URL = "http://localhost:"+str(SERVER_PORT)+"/" # external URL +WEBSERVER_URL = "http://localhost:" + str(SERVER_PORT) + "/" # external URL -LOG_LEVEL = 'WARNING' # Logger mode (DEBUG|INFO|WARNING|ERROR|CRITICAL) +LOG_LEVEL = 'WARNING' # Logger mode (DEBUG|INFO|WARNING|ERROR|CRITICAL) LOG_LEVEL_DEBUG = '0' # logger.debugf log level (0-5, 5 = show all) -LOG_SQL = 'False' # Log SQL/backend and GN_SERVER calls +LOG_SQL = 'False' # Log SQL/backend and GN_SERVER calls LOG_SQL_ALCHEMY = 'False' -LOG_BENCH = True # Log bench marks +LOG_BENCH = True # Log bench marks -USE_REDIS = True # REDIS caching (note that redis will be phased out) -USE_GN_SERVER = 'False' # Use GN_SERVER SQL calls -HOME = os.environ['HOME'] +USE_REDIS = True # REDIS caching (note that redis will be phased out) +USE_GN_SERVER = 'False' # Use GN_SERVER SQL calls +HOME = os.environ['HOME'] # ---- Default locations -GENENETWORK_FILES = HOME+"/genotype_files" # base dir for all static data files +# base dir for all static data files +GENENETWORK_FILES = HOME + "/genotype_files" # ---- Path overrides for Genenetwork - the defaults are normally # picked up from Guix or in the HOME directory @@ -98,7 +99,7 @@ GENENETWORK_FILES = HOME+"/genotype_files" # base dir for all static data fil # PRIVATE_FILES = HOME+"/gn2_private_data" # private static data files (unused) # ---- Local path to JS libraries - for development modules (only) -JS_GN_PATH = os.environ['HOME']+"/genenetwork/javascript" +JS_GN_PATH = os.environ['HOME'] + "/genenetwork/javascript" # ---- GN2 Executables (overwrite for testing only) # PLINK_COMMAND = str.strip(os.popen("which plink2").read()) diff --git a/locustfile.py b/locustfile.py new file mode 100644 index 00000000..4abf3f1c --- /dev/null +++ b/locustfile.py @@ -0,0 +1,13 @@ +"""Load test a single trait page""" +from locust import HttpUser, task, between + + +class LoadTest(HttpUser): + wait_time = between(1, 2.5) + + @task + def fetch_trait(self): + """Fetch a single trait""" + # /api/v_pre1/gen_dropdown + self.client.get("/show_trait?trait_id=" + "1457545_at&dataset=HC_M2_0606_P") diff --git a/scripts/maintenance/datastructure.py b/scripts/maintenance/datastructure.py new file mode 100755 index 00000000..9f3e8b1e --- /dev/null +++ b/scripts/maintenance/datastructure.py @@ -0,0 +1,177 @@ +import utilities + +def get_probesetfreezes(inbredsetid): + cursor, con = utilities.get_cursor() + sql = """ + SELECT ProbeSetFreeze.`Id`, ProbeSetFreeze.`Name`, ProbeSetFreeze.`FullName` + FROM ProbeSetFreeze, ProbeFreeze + WHERE ProbeSetFreeze.`ProbeFreezeId`=ProbeFreeze.`Id` + AND ProbeFreeze.`InbredSetId`=%s + """ + cursor.execute(sql, (inbredsetid)) + return cursor.fetchall() + +def get_probesetfreeze(probesetfreezeid): + cursor, con = utilities.get_cursor() + sql = """ + SELECT ProbeSetFreeze.`Id`, ProbeSetFreeze.`Name`, ProbeSetFreeze.`FullName` + FROM ProbeSetFreeze + WHERE ProbeSetFreeze.`Id`=%s + """ + cursor.execute(sql, (probesetfreezeid)) + return cursor.fetchone() + +def get_strains(inbredsetid): + cursor, con = utilities.get_cursor() + sql = """ + SELECT Strain.`Id`, Strain.`Name` + FROM StrainXRef, Strain + WHERE StrainXRef.`InbredSetId`=%s + AND StrainXRef.`StrainId`=Strain.`Id` + ORDER BY StrainXRef.`OrderId` + """ + cursor.execute(sql, (inbredsetid)) + return cursor.fetchall() + +def get_inbredset(probesetfreezeid): + cursor, con = utilities.get_cursor() + sql = """ + SELECT InbredSet.`Id`, InbredSet.`Name`, InbredSet.`FullName` + FROM InbredSet, ProbeFreeze, ProbeSetFreeze + WHERE InbredSet.`Id`=ProbeFreeze.`InbredSetId` + AND ProbeFreeze.`Id`=ProbeSetFreeze.`ProbeFreezeId` + AND ProbeSetFreeze.`Id`=%s + """ + cursor.execute(sql, (probesetfreezeid)) + return cursor.fetchone() + +def get_species(inbredsetid): + cursor, con = utilities.get_cursor() + sql = """ + SELECT Species.`Id`, Species.`Name`, Species.`MenuName`, Species.`FullName` + FROM InbredSet, Species + WHERE InbredSet.`Id`=%s + AND InbredSet.`SpeciesId`=Species.`Id` + """ + cursor.execute(sql, (inbredsetid)) + return cursor.fetchone() + +def get_genofreeze_byinbredsetid(inbredsetid): + cursor, con = utilities.get_cursor() + sql = """ + SELECT GenoFreeze.`Id`, GenoFreeze.`Name`, GenoFreeze.`FullName`, GenoFreeze.`InbredSetId` + FROM GenoFreeze + WHERE GenoFreeze.`InbredSetId`=%s + """ + cursor.execute(sql, (inbredsetid)) + return cursor.fetchone() + +def get_nextdataid_genotype(): + cursor, con = utilities.get_cursor() + sql = """ + SELECT GenoData.`Id` + FROM GenoData + ORDER BY GenoData.`Id` DESC + LIMIT 1 + """ + cursor.execute(sql) + re = cursor.fetchone() + dataid = re[0] + dataid += 1 + return dataid + +def get_nextdataid_phenotype(): + cursor, con = utilities.get_cursor() + sql = """ + SELECT PublishData.`Id` + FROM PublishData + ORDER BY PublishData.`Id` DESC + LIMIT 1 + """ + cursor.execute(sql) + re = cursor.fetchone() + dataid = re[0] + dataid += 1 + return dataid + +def get_nextorderid_strainxref(inbredsetid): + cursor, con = utilities.get_cursor() + sql = """ + SELECT StrainXRef.`OrderId` + FROM StrainXRef + WHERE StrainXRef.`InbredSetId`=%s + ORDER BY StrainXRef.`OrderId` DESC + LIMIT 1 + """ + cursor.execute(sql, (inbredsetid)) + re = cursor.fetchone() + if re: + orderid = re[0] + 1 + else: + orderid = 1 + return orderid + +def insert_strain(inbredsetid, strainname): + speciesid = get_species(inbredsetid)[0] + cursor, con = utilities.get_cursor() + sql = """ + INSERT INTO Strain + SET + Strain.`Name`=%s, + Strain.`Name2`=%s, + Strain.`SpeciesId`=%s + """ + cursor.execute(sql, (strainname, strainname, speciesid)) + +def insert_strainxref(inbredsetid, strainid): + orderid = get_nextorderid_strainxref(inbredsetid) + cursor, con = utilities.get_cursor() + sql = """ + INSERT INTO StrainXRef + SET + StrainXRef.`InbredSetId`=%s, + StrainXRef.`StrainId`=%s, + StrainXRef.`OrderId`=%s, + StrainXRef.`Used_for_mapping`=%s, + StrainXRef.`PedigreeStatus`=%s + """ + cursor.execute(sql, (inbredsetid, strainid, orderid, "N", None)) + +def get_strain(inbredsetid, strainname): + speciesid = get_species(inbredsetid)[0] + cursor, con = utilities.get_cursor() + sql = """ + SELECT Strain.`Id`, Strain.`Name` + FROM Strain + WHERE Strain.`SpeciesId`=%s + AND Strain.`Name` LIKE %s + """ + cursor.execute(sql, (speciesid, strainname)) + return cursor.fetchone() + +def get_strainxref(inbredsetid, strainid): + cursor, con = utilities.get_cursor() + sql = """ + SELECT StrainXRef.`StrainId` + FROM StrainXRef + WHERE StrainXRef.`InbredSetId`=%s + AND StrainXRef.`StrainId`=%s + """ + cursor.execute(sql, (inbredsetid, strainid)) + return cursor.fetchone() + +def get_strain_sure(inbredsetid, strainname, updatestrainxref=None): + strain = get_strain(inbredsetid, strainname) + if not strain: + insert_strain(inbredsetid, strainname) + strain = get_strain(inbredsetid, strainname) + strainxref = get_strainxref(inbredsetid, strain[0]) + if not strainxref and updatestrainxref: + insert_strainxref(inbredsetid, strain[0]) + return strain + +def get_strains_bynames(inbredsetid, strainnames, updatestrainxref=None): + strains = [] + for strainname in strainnames: + strains.append(get_strain_sure(inbredsetid, strainname, updatestrainxref)) + return strains diff --git a/scripts/maintenance/load_phenotypes.py b/scripts/maintenance/load_phenotypes.py index 759d2eec..aa02d0cd 100755 --- a/scripts/maintenance/load_phenotypes.py +++ b/scripts/maintenance/load_phenotypes.py @@ -1,3 +1,11 @@ +# Load Python3 environment with GN2 utilities: +# +# source /usr/local/guix-profiles/gn-latest-20210512/etc/profile +# +# and run +# +# python load_phenotypes.py [args...] + import sys import csv @@ -9,35 +17,34 @@ def main(argv): config = utilities.get_config(argv[1]) print("config:") for item in config.items('config'): - print(("\t%s" % (str(item)))) + print("\t%s" % (str(item))) # var inbredsetid = config.get('config', 'inbredsetid') - print(("inbredsetid: %s" % inbredsetid)) + print("inbredsetid: %s" % inbredsetid) species = datastructure.get_species(inbredsetid) speciesid = species[0] - print(("speciesid: %s" % speciesid)) + print("speciesid: %s" % speciesid) dataid = datastructure.get_nextdataid_phenotype() - print(("next data id: %s" % dataid)) + print("next data id: %s" % dataid) cursor, con = utilities.get_cursor() # datafile datafile = open(config.get('config', 'datafile'), 'r') phenotypedata = csv.reader(datafile, delimiter='\t', quotechar='"') - phenotypedata_head = next(phenotypedata) - print(("phenotypedata head:\n\t%s" % phenotypedata_head)) + phenotypedata_head = phenotypedata.next() + print("phenotypedata head:\n\t%s" % phenotypedata_head) strainnames = phenotypedata_head[1:] strains = datastructure.get_strains_bynames(inbredsetid=inbredsetid, strainnames=strainnames, updatestrainxref="yes") # metafile metafile = open(config.get('config', 'metafile'), 'r') phenotypemeta = csv.reader(metafile, delimiter='\t', quotechar='"') - phenotypemeta_head = next(phenotypemeta) - print(("phenotypemeta head:\n\t%s" % phenotypemeta_head)) - print() + phenotypemeta_head = phenotypemeta.next() + print("phenotypemeta head:\n\t%s" % phenotypemeta_head) # load for metarow in phenotypemeta: # - datarow_value = next(phenotypedata) - datarow_se = next(phenotypedata) - datarow_n = next(phenotypedata) + datarow_value = phenotypedata.next() + datarow_se = phenotypedata.next() + datarow_n = phenotypedata.next() # Phenotype sql = """ INSERT INTO Phenotype @@ -67,7 +74,7 @@ def main(argv): )) rowcount = cursor.rowcount phenotypeid = con.insert_id() - print(("INSERT INTO Phenotype: %d record: %d" % (rowcount, phenotypeid))) + print("INSERT INTO Phenotype: %d record: %d" % (rowcount, phenotypeid)) # Publication publicationid = None # reset pubmed_id = utilities.to_db_string(metarow[0], None) @@ -81,7 +88,7 @@ def main(argv): re = cursor.fetchone() if re: publicationid = re[0] - print(("get Publication record: %d" % publicationid)) + print("get Publication record: %d" % publicationid) if not publicationid: sql = """ INSERT INTO Publication @@ -109,7 +116,7 @@ def main(argv): )) rowcount = cursor.rowcount publicationid = con.insert_id() - print(("INSERT INTO Publication: %d record: %d" % (rowcount, publicationid))) + print("INSERT INTO Publication: %d record: %d" % (rowcount, publicationid)) # data for index, strain in enumerate(strains): # @@ -158,14 +165,14 @@ def main(argv): cursor.execute(sql, (inbredsetid, phenotypeid, publicationid, dataid, "")) rowcount = cursor.rowcount publishxrefid = con.insert_id() - print(("INSERT INTO PublishXRef: %d record: %d" % (rowcount, publishxrefid))) + print("INSERT INTO PublishXRef: %d record: %d" % (rowcount, publishxrefid)) # for loop next dataid += 1 - print() + print # release con.close() if __name__ == "__main__": - print(("command line arguments:\n\t%s" % sys.argv)) + print("command line arguments:\n\t%s" % sys.argv) main(sys.argv) print("exit successfully") diff --git a/scripts/maintenance/utilities.py b/scripts/maintenance/utilities.py new file mode 100644 index 00000000..886410c2 --- /dev/null +++ b/scripts/maintenance/utilities.py @@ -0,0 +1,89 @@ +import MySQLdb +import re +import configparser + +def get_cursor(): + host = 'tux.uthsc.edu' + user = 'webqtlout' + passwd = 'webqtlout' + db = 'db_webqtl' + con = MySQLdb.Connect(db=db, host=host, user=user, passwd=passwd) + cursor = con.cursor() + return cursor, con + +def clearspaces(s, default=None): + if s: + s = re.sub('\s+', ' ', s) + s = s.strip() + return s + else: + return default + +def to_dic(keys, values): + dic = {} + for i in range(len(keys)): + key = keys[i] + value = values[i] + dic[key] = value + return dic + +def overlap(dic1, dic2): + keys = [] + values1 = [] + values2 = [] + for key in dic1.keys(): + if key in dic2: + value1 = dic1[key] + value2 = dic2[key] + if value1 and value2: + keys.append(key) + values1.append(value1) + values2.append(value2) + return keys, values1, values2 + +def to_db_string(s, default): + if s: + s = s.strip() + if len(s) == 0: + return default + elif s == 'x': + return default + else: + return s + else: + return default + +def to_db_float(s, default): + if s: + s = s.strip() + if len(s) == 0: + return default + elif s == 'x': + return default + else: + try: + return float(s) + except: + return default + else: + return default + +def to_db_int(s, default): + if s: + s = s.strip() + if len(s) == 0: + return default + elif s == 'x': + return default + else: + try: + return int(s) + except: + return default + else: + return default + +def get_config(configfile): + config = configparser.ConfigParser() + config.read(configfile) + return config diff --git a/test/requests/links_scraper/genelinks.py b/test/requests/links_scraper/genelinks.py index 12300f4a..52c13489 100644 --- a/test/requests/links_scraper/genelinks.py +++ b/test/requests/links_scraper/genelinks.py @@ -2,7 +2,6 @@ import re import requests import urllib3 import os -import logging from urllib.request import urlopen as uReq from bs4 import BeautifulSoup as soup @@ -26,7 +25,6 @@ def search_templates(): parsed_page = soup( open(file_path, encoding="utf8"), "html.parser") html_parsed_pages.append(parsed_page) - return html_parsed_pages @@ -34,7 +32,7 @@ def is_valid_link(url_link): try: result = urlparse(url_link) return all([result.scheme, result.netloc, result.path]) - except Exception as e: + except Exception: return False @@ -42,13 +40,10 @@ def test_link(link): print(f'Checking -->{link}') results = None try: - results = requests.get(link, verify=False, timeout=10) status_code = results.status_code - - except Exception as e: + except Exception: status_code = 408 - return int(status_code) > 403 @@ -56,14 +51,11 @@ def fetch_css_links(parsed_page): print("fetching css links") for link in parsed_page.findAll("link"): full_path = None - link_url = link.attrs.get("href") if is_valid_link(link_url): full_path = link_url - elif re.match(r"^/css", link_url) or re.match(r"^/js", link_url): full_path = urljoin('http://localhost:5004/', link_url) - if full_path is not None: if test_link(full_path): BROKEN_LINKS.add(full_path) @@ -71,16 +63,13 @@ def fetch_css_links(parsed_page): def fetch_html_links(parsed_page): print("fetching a tags ") - for link in parsed_page.findAll("a"): full_path = None link_url = link.attrs.get("href") if re.match(r"^/", link_url): full_path = urljoin('http://localhost:5004/', link_url) - elif is_valid_link(link_url): full_path = link_url - if full_path is not None: if test_link(full_path): BROKEN_LINKS.add(full_path) @@ -92,8 +81,11 @@ def fetch_script_tags(parsed_page): js_link = link.attrs.get("src") if js_link is not None: if is_valid_link(js_link): - raise SystemExit("Failed,the library should be packaged in guix.\ - Please contact,http://genenetwork.org/ for more details") + raise SystemExit("Failed,the library should be " + "packaged in guix. " + "Please contact, " + "http://genenetwork.org/ " + "for more details") elif re.match(r"^/css", js_link) or re.match(r"^/js", js_link): full_path = urljoin('http://localhost:5004/', js_link) @@ -102,11 +94,9 @@ def fetch_script_tags(parsed_page): def fetch_page_links(page_url): - urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) html_page = uReq(page_url) parsed_page = soup(html_page, "html.parser") - fetch_script_tags(parsed_page=parsed_page) fetch_css_links(parsed_page=parsed_page) fetch_html_links(parsed_page=parsed_page) @@ -114,13 +104,10 @@ def fetch_page_links(page_url): def webpages_to_check(): pages = [f"http://localhost:{PORT}/"] - return pages if __name__ == '__main__': - # results = search_templates() - for page in webpages_to_check(): fetch_page_links(page) if len(BROKEN_LINKS) > 0: @@ -130,4 +117,5 @@ if __name__ == '__main__': if len(BROKEN_LINKS) > 0: raise SystemExit( - "The links Above are broken.Please contact genenetwork.org<<<<<<<<") + "The links Above are broken. " + "Please contact genenetwork.org<<<<<<<<") diff --git a/test/requests/test_forgot_password.py b/test/requests/test_forgot_password.py index 2bf34c5c..346524bc 100644 --- a/test/requests/test_forgot_password.py +++ b/test/requests/test_forgot_password.py @@ -1,5 +1,4 @@ import requests -from wqflask import user_manager from utility.elasticsearch_tools import get_user_by_unique_column from parameterized import parameterized from parametrized_test import ParametrizedTest @@ -27,8 +26,7 @@ class TestForgotPassword(ParametrizedTest): "password": "test_password", "password_confirm": "test_password" } - user_manager.basic_info = lambda : { "basic_info": "basic" } - user_manager.RegisterUser(data) + def testWithoutEmail(self): data = {"email_address": ""} diff --git a/test/requests/test_login_local.py b/test/requests/test_login_local.py index 808649ca..6691d135 100644 --- a/test/requests/test_login_local.py +++ b/test/requests/test_login_local.py @@ -1,5 +1,4 @@ import requests -from wqflask import user_manager from parameterized import parameterized from parametrized_test import ParametrizedTest @@ -19,8 +18,6 @@ class TestLoginLocal(ParametrizedTest): "password": "test_password", "password_confirm": "test_password" } - user_manager.basic_info = lambda : { "basic_info": "basic" } - user_manager.RegisterUser(data) @parameterized.expand([ diff --git a/webtests/browser_run.py b/webtests/browser_run.py deleted file mode 100644 index 7ee540b7..00000000 --- a/webtests/browser_run.py +++ /dev/null @@ -1,72 +0,0 @@ -__all__ = ('sleep', 'testmod', 'test') - -from doctest import testmod - -from time import sleep - -import selenium -from selenium import webdriver -from selenium.common.exceptions import NoSuchElementException, ElementNotVisibleException -from selenium.webdriver.common.keys import Keys - -class Test(object): - def __init__(self): - #self.browser = webdriver.Chrome('/home/gn2/gn2/webtests/chromedriver') - self.browser = webdriver.Firefox() - - def get(self, url): - self.browser.get(url) - sleep(5) - self.title() - - def click(self, xpath_selector): - el = self.browser.find_element_by_xpath(xpath_selector) - if el.text: - text = el.text.strip() - elif el.get_attribute("value"): - text = el.get_attribute("value").strip() - else: - text = "Notext" - el.click() - print("clicked:", text) - sleep(2) - - def click_option(self, xpath_selector, option_text): - el = self.browser.find_element_by_xpath(xpath_selector) - for option in el.find_elements_by_tag_name('option'): - if option.text == option_text: - option.click() # select() in earlier versions of webdriver - break - sleep(2) - - def enter_text(self, xpath_selector, text): - el = self.browser.find_element_by_xpath(xpath_selector) - sleep(10) - el.send_keys(text) - sleep(5) - # Just in case things get mangled by JavaScript, etc. we print the text for testing - self.get_text(xpath_selector) - - def get_text(self, xpath_selector): - el = self.browser.find_element_by_xpath(xpath_selector) - text = el.text.strip() or el.get_attribute("value").strip() - print("text:", text) - - def get_element_style(self, xpath_selector): - el = self.browser.find_element_by_xpath(xpath_selector) - style = el.get_attribute("style").strip() - print("style:", style) - - def switch_window(self): - self.browser.switch_to_window(self.browser.window_handles[-1]) - sleep(2) - self.title() - sleep(2) - - - def title(self): - print("title:", self.browser.title) - - - -test = Test() diff --git a/webtests/chromedriver b/webtests/chromedriver Binary files differdeleted file mode 100644 index 754eb668..00000000 --- a/webtests/chromedriver +++ /dev/null diff --git a/webtests/correlation_matrix_test.py b/webtests/correlation_matrix_test.py deleted file mode 100644 index 97114890..00000000 --- a/webtests/correlation_matrix_test.py +++ /dev/null @@ -1,70 +0,0 @@ -""" - -Test Correlation matrix - ->>> test.get("http://genenetwork.org/") -title: GeneNetwork - -Choose the type ->>> test.click_option('''//*[@id="tissue"]''', 'Hippocampus mRNA') - -Enter the Get Any ->>> test.enter_text('''//*[@id="tfor"]''', 'grin2b') -text: grin2b - -Search ->>> test.click('//*[@id="btsearch"]') -clicked: Search - -Add 1430675_at ->>> test.click('''/html/body/table/tbody/tr[3]/td/table/tbody/tr/td/form/p/table/tbody/tr[3]/td/div/table/tbody/tr[2]/td/input''') -clicked: HC_M2_0606_P::1430675_at - -Add 1442370_at ->>> test.click('''/html/body/table/tbody/tr[3]/td/table/tbody/tr/td/form/p/table/tbody/tr[3]/td/div/table/tbody/tr[3]/td/input''') -clicked: HC_M2_0606_P::1442370_at - -Add 1457003_at ->>> test.click('''/html/body/table/tbody/tr[3]/td/table/tbody/tr/td/form/p/table/tbody/tr[3]/td/div/table/tbody/tr[4]/td/input''') -clicked: HC_M2_0606_P::1457003_at - -Add 1422223_at ->>> test.click('''/html/body/table/tbody/tr[3]/td/table/tbody/tr/td/form/p/table/tbody/tr[3]/td/div/table/tbody/tr[5]/td/input''') -clicked: HC_M2_0606_P::1422223_at - ->>> sleep(5) - -Add to collection page ->>> test.click('''/html/body/table/tbody/tr[3]/td/table/tbody/tr/td/form/p/table/tbody/tr[1]/td/table/tbody/tr[1]/td[4]/a''') -clicked: Notext - ->>> sleep(5) - -A new window is created, so we switch to it ->>> test.switch_window() -title: BXD Trait Collection - -Select all records ->>> test.click('''/html/body/table/tbody/tr[3]/td/table/tbody/tr/td/form/table/tbody/tr/td/table/tbody/tr/td/table/tbody/tr/td/a/img''') -clicked: Notext - -Click Matrix ->>> test.click('''/html/body/table/tbody/tr[3]/td/table/tbody/tr/td/form/table/tbody/tr/td/table/tbody/tr[2]/td/table/tbody/tr/td[2]/a/img''') -clicked: Notext - -Another new window ->>> test.switch_window() -title: Correlation Matrix - -Sleep a bunch because this can take a while ->>> sleep(10) - -Ensure that the correlation between Trait3 (HC_M2_0606_P::1457003_at) and Trait4 (HC_M2_0606_P::1422223_at) is 0.608 ->>> test.get_text('''/html/body/table/tbody/tr[3]/td/table/tbody/tr/td/form/blockquote/table/tbody/tr[5]/td[5]/a/font''') -text: 0.608\n71 - -""" - -from browser_run import * - -testmod() diff --git a/webtests/correlation_test.py b/webtests/correlation_test.py deleted file mode 100644 index 311bb847..00000000 --- a/webtests/correlation_test.py +++ /dev/null @@ -1,49 +0,0 @@ -""" -Test calculate correlations - ->>> test.get("http://genenetwork.org") -title: GeneNetwork - -Choose the type ->>> test.click_option('''//*[@id="tissue"]''', 'Hippocampus mRNA') - -Enter the Get Any ->>> test.enter_text('''//*[@id="tfor"]''', 'ssh') -text: ssh - -Search ->>> test.click('//*[@id="btsearch"]') -clicked: Search - -Choose the first result ->>> test.click('''/html/body/table/tbody/tr[3]/td/table/tbody/tr/td/form/p/table/tbody/tr[3]/td/div/table/tbody/tr[2]/td[2]/a''') -clicked: 1455854_a_at - -A new window is created, so we switch to it ->>> test.switch_window() -title: Hippocampus M430v2 BXD 06/06 PDNN : 1455854_a_at: Display Trait - -Click on Calculate Correlations ->>> test.click('''//*[@id="title3"]''') -clicked: Calculate Correlations - -Click on Compute ->>> test.click('''/html/body/table/tbody/tr[3]/td/table/tbody/tr/td/form/p[6]/table/tbody/tr/td/div/div/span/table/tbody/tr/td/input[3]''') -clicked: Compute - -Another new window ->>> test.switch_window() -title: Correlation - -Sleep a bunch because this can take a while ->>> sleep(25) - -Ensure the Sample rho is the exepcted 1.000 because it should be the same record ->>> test.get_text('''/html/body/table/tbody/tr[3]/td/table/tbody/tr/td/form/table/tbody/tr[2]/td/div/table/tbody/tr[2]/td[9]/a''') -text: 1.000 - -""" - -from browser_run import * - -testmod() diff --git a/webtests/libpeerconnection.log b/webtests/libpeerconnection.log deleted file mode 100644 index e69de29b..00000000 --- a/webtests/libpeerconnection.log +++ /dev/null diff --git a/webtests/marker_regression_test.py b/webtests/marker_regression_test.py deleted file mode 100644 index 9b4a4acb..00000000 --- a/webtests/marker_regression_test.py +++ /dev/null @@ -1,53 +0,0 @@ -""" -Test calculate correlations - ->>> test.get("http://genenetwork.org") -title: GeneNetwork - -Choose the type ->>> test.click_option('''//*[@id="tissue"]''', 'Liver mRNA') - -Enter the Get Any ->>> test.enter_text('''//*[@id="tfor"]''', 'grin2b') -text: grin2b - -Search ->>> test.click('//*[@id="btsearch"]') -clicked: Search - -Choose the second result ->>> test.click('''/html/body/table/tbody/tr[3]/td/table/tbody/tr/td/form/p/table/tbody/tr[3]/td/div/table/tbody/tr[3]/td[2]/a''') -clicked: 1431700_at_A - -A new window is created, so we switch to it ->>> test.switch_window() -title: GSE16780 UCLA Hybrid MDP Liver Affy HT M430A (Sep11) RMA : 1431700_at_A: Display Trait - -Click on Mapping Tools ->>> test.click('''//*[@id="title4"]''') -clicked: Mapping Tools - -Click on Marker Regression tab ->>> test.click('''//*[@id="mapping_tabs"]/ul/li[2]/a''') -clicked: Marker Regression - -Click on Compute ->>> test.click('''//*[@id="mappingtabs-2"]/span/table/tbody/tr[1]/td/input''') -clicked: Compute - -Another new window ->>> test.switch_window() -title: Genome Association Result - -Sleep a bunch because this can take a while ->>> sleep(60) - -Ensure that the LRS of the top record is the exepcted value ->>> test.get_text('''//*[@id="1"]/td[2]''') -text: 11.511 - -""" - -from browser_run import * - -testmod() diff --git a/webtests/show_trait_js_test.py b/webtests/show_trait_js_test.py deleted file mode 100644 index 34ffd3b7..00000000 --- a/webtests/show_trait_js_test.py +++ /dev/null @@ -1,40 +0,0 @@ -""" -Test if JS is working on the show trait page - ->>> test.get("alexandria.uthsc.edu:89") -title: GeneNetwork - -Choose the species ->>> test.click_option('''//*[@id="species"]''', 'Human') - -Choose the group ->>> test.click_option('''//*[@id="cross"]''', 'Human Brain Transcriptome (Yale/Kavli)') - -Choose the type ->>> test.click_option('''//*[@id="tissue"]''', 'Orbital Prefrontal Cortex mRNA') - -Enter the Get Any ->>> test.enter_text('''//*[@id="tfor"]''', 'shh') -text: shh - -Search ->>> test.click('//*[@id="btsearch"]') -clicked: Search - -Choose the first result ->>> test.click('''//*[@id="KIN_YSM_OFC_0711::3081205"]/td[2]/a''') -clicked: 3081205 - -A new window is created, so we switch to it ->>> test.switch_window() -title: KIN/YSM Human OFC Affy Hu-Exon 1.0 ST (Jul11) Quantile : 3081205: Display Trait - -Check that the Calculate Correlations tab is closed (if javascript isn't working, it will be open) ->>> test.get_element_style('''//*[@id="sectionbody3"]''') -style: display: none; - -""" - -from browser_run import * - -testmod() diff --git a/webtests/test_runner.py b/webtests/test_runner.py deleted file mode 100644 index b5b590a6..00000000 --- a/webtests/test_runner.py +++ /dev/null @@ -1,26 +0,0 @@ -import unittest -import doctest -import glob - -tests = ("correlation_test", - "correlation_matrix_test", - "marker_regression_test", - "show_trait_js_test") - - -def main(): - tests = glob.glob("*_test.py") - - suite = unittest.TestSuite() - - for testname in tests: - test = testname.rsplit(".", 1)[0] - print("Test is:", test) - mod = __import__(test) - suite.addTest(doctest.DocTestSuite(mod)) - - runner = unittest.TextTestRunner() - runner.run(suite) - -if __name__ == '__main__': - main() diff --git a/wqflask/base/GeneralObject.py b/wqflask/base/GeneralObject.py index 249195e2..ce8e60b8 100644 --- a/wqflask/base/GeneralObject.py +++ b/wqflask/base/GeneralObject.py @@ -62,5 +62,5 @@ class GeneralObject: return s def __eq__(self, other): - return (len(list(self.__dict__.keys())) == - len(list(other.__dict__.keys()))) + return (len(list(self.__dict__.keys())) + == len(list(other.__dict__.keys()))) diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 178234fe..75ddf278 100644 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -17,7 +17,10 @@ # at rwilliams@uthsc.edu and xzhou15@uthsc.edu # # This module is used by GeneNetwork project (www.genenetwork.org) - +from dataclasses import dataclass +from dataclasses import field +from dataclasses import InitVar +from typing import Optional, Dict from db.call import fetchall, fetchone, fetch1 from utility.logger import getLogger from utility.tools import USE_GN_SERVER, USE_REDIS, flat_files, flat_file_exists, GN2_BASE_URL @@ -59,7 +62,8 @@ logger = getLogger(__name__) DS_NAME_MAP = {} -def create_dataset(dataset_name, dataset_type=None, get_samplelist=True, group_name=None): +def create_dataset(dataset_name, dataset_type=None, + get_samplelist=True, group_name=None): if dataset_name == "Temp": dataset_type = "Temp" @@ -74,11 +78,10 @@ def create_dataset(dataset_name, dataset_type=None, get_samplelist=True, group_n return dataset_class(dataset_name, get_samplelist) +@dataclass class DatasetType: - - def __init__(self, redis_instance): - """Create a dictionary of samples where the value is set to Geno, -Publish or ProbeSet. E.g. + """Create a dictionary of samples where the value is set to Geno, + Publish or ProbeSet. E.g. {'AD-cases-controls-MyersGeno': 'Geno', 'AD-cases-controls-MyersPublish': 'Publish', @@ -89,21 +92,28 @@ Publish or ProbeSet. E.g. 'All Phenotypes': 'Publish', 'B139_K_1206_M': 'ProbeSet', 'B139_K_1206_R': 'ProbeSet' ... - + } """ + redis_instance: InitVar[Redis] + datasets: Optional[Dict] = field(init=False, default_factory=dict) + data: Optional[Dict] = field(init=False) + + def __post_init__(self, redis_instance): self.redis_instance = redis_instance - self.datasets = {} - data = self.redis_instance.get("dataset_structure") + data = redis_instance.get("dataset_structure") if data: self.datasets = json.loads(data) - else: # ZS: I don't think this should ever run unless Redis is emptied + else: + # ZS: I don't think this should ever run unless Redis is + # emptied try: data = json.loads(requests.get( - GN2_BASE_URL + "/api/v_pre1/gen_dropdown", timeout=5).content) - for species in data['datasets']: - for group in data['datasets'][species]: - for dataset_type in data['datasets'][species][group]: - for dataset in data['datasets'][species][group][dataset_type]: + GN2_BASE_URL + "/api/v_pre1/gen_dropdown", + timeout=5).content) + for _species in data['datasets']: + for group in data['datasets'][_species]: + for dataset_type in data['datasets'][_species][group]: + for dataset in data['datasets'][_species][group][dataset_type]: short_dataset_name = dataset[1] if dataset_type == "Phenotypes": new_type = "Publish" @@ -112,15 +122,16 @@ Publish or ProbeSet. E.g. else: new_type = "ProbeSet" self.datasets[short_dataset_name] = new_type - except: + except Exception: # Do nothing pass - self.redis_instance.set("dataset_structure", json.dumps(self.datasets)) + self.redis_instance.set("dataset_structure", + json.dumps(self.datasets)) + self.data = data def set_dataset_key(self, t, name): - """If name is not in the object's dataset dictionary, set it, and update - dataset_structure in Redis - + """If name is not in the object's dataset dictionary, set it, and + update dataset_structure in Redis args: t: Type of dataset structure which can be: 'mrna_expr', 'pheno', 'other_pheno', 'geno' @@ -128,19 +139,20 @@ Publish or ProbeSet. E.g. """ sql_query_mapping = { - 'mrna_expr': ("""SELECT ProbeSetFreeze.Id FROM """ + - """ProbeSetFreeze WHERE ProbeSetFreeze.Name = "{}" """), - 'pheno': ("""SELECT InfoFiles.GN_AccesionId """ + - """FROM InfoFiles, PublishFreeze, InbredSet """ + - """WHERE InbredSet.Name = '{}' AND """ + - """PublishFreeze.InbredSetId = InbredSet.Id AND """ + - """InfoFiles.InfoPageName = PublishFreeze.Name"""), - 'other_pheno': ("""SELECT PublishFreeze.Name """ + - """FROM PublishFreeze, InbredSet """ + - """WHERE InbredSet.Name = '{}' AND """ + - """PublishFreeze.InbredSetId = InbredSet.Id"""), - 'geno': ("""SELECT GenoFreeze.Id FROM GenoFreeze WHERE """ + - """GenoFreeze.Name = "{}" """) + 'mrna_expr': ("SELECT ProbeSetFreeze.Id FROM " + "ProbeSetFreeze WHERE " + "ProbeSetFreeze.Name = \"%s\" "), + 'pheno': ("SELECT InfoFiles.GN_AccesionId " + "FROM InfoFiles, PublishFreeze, InbredSet " + "WHERE InbredSet.Name = '%s' AND " + "PublishFreeze.InbredSetId = InbredSet.Id AND " + "InfoFiles.InfoPageName = PublishFreeze.Name"), + 'other_pheno': ("SELECT PublishFreeze.Name " + "FROM PublishFreeze, InbredSet " + "WHERE InbredSet.Name = '%s' AND " + "PublishFreeze.InbredSetId = InbredSet.Id"), + 'geno': ("SELECT GenoFreeze.Id FROM GenoFreeze WHERE " + "GenoFreeze.Name = \"%s\" ") } dataset_name_mapping = { @@ -154,22 +166,24 @@ Publish or ProbeSet. E.g. if t in ['pheno', 'other_pheno']: group_name = name.replace("Publish", "") - results = g.db.execute(sql_query_mapping[t].format(group_name)).fetchone() + + results = g.db.execute(sql_query_mapping[t] % group_name).fetchone() if results: self.datasets[name] = dataset_name_mapping[t] - self.redis_instance.set("dataset_structure", json.dumps(self.datasets)) + self.redis_instance.set( + "dataset_structure", json.dumps(self.datasets)) return True - return None def __call__(self, name): - if name not in self.datasets: for t in ["mrna_expr", "pheno", "other_pheno", "geno"]: - # This has side-effects, with the end result being a truth-y value + # This has side-effects, with the end result being a + # truth-y value if(self.set_dataset_key(t, name)): break - return self.datasets.get(name, None) # Return None if name has not been set + # Return None if name has not been set + return self.datasets.get(name, None) # Do the intensive work at startup one time only @@ -204,12 +218,12 @@ def create_datasets_list(): if USE_REDIS: r.set(key, pickle.dumps(datasets, pickle.HIGHEST_PROTOCOL)) - r.expire(key, 60*60) + r.expire(key, 60 * 60) return datasets -class Markers(object): +class Markers: """Todo: Build in cacheing so it saves us reading the same file more than once""" def __init__(self, name): @@ -228,7 +242,8 @@ class Markers(object): for line in bimbam_fh: marker = {} marker['name'] = line.split(delimiter)[0].rstrip() - marker['Mb'] = float(line.split(delimiter)[1].rstrip())/1000000 + marker['Mb'] = float(line.split(delimiter)[ + 1].rstrip()) / 1000000 marker['chr'] = line.split(delimiter)[2].rstrip() markers.append(marker) @@ -262,8 +277,6 @@ class Markers(object): elif isinstance(p_values, dict): filtered_markers = [] for marker in self.markers: - #logger.debug("marker[name]", marker['name']) - #logger.debug("p_values:", p_values) if marker['name'] in p_values: #logger.debug("marker {} IS in p_values".format(i)) marker['p_value'] = p_values[marker['name']] @@ -276,10 +289,6 @@ class Markers(object): marker['lrs_value'] = - \ math.log10(marker['p_value']) * 4.61 filtered_markers.append(marker) - # else: - #logger.debug("marker {} NOT in p_values".format(i)) - # self.markers.remove(marker) - #del self.markers[i] self.markers = filtered_markers @@ -306,13 +315,11 @@ class HumanMarkers(Markers): marker['Mb'] = float(splat[3]) / 1000000 self.markers.append(marker) - #logger.debug("markers is: ", pf(self.markers)) - def add_pvalues(self, p_values): super(HumanMarkers, self).add_pvalues(p_values) -class DatasetGroup(object): +class DatasetGroup: """ Each group has multiple datasets; each species has multiple groups. @@ -365,8 +372,8 @@ class DatasetGroup(object): def get_markers(self): def check_plink_gemma(): if flat_file_exists("mapping"): - MAPPING_PATH = flat_files("mapping")+"/" - if os.path.isfile(MAPPING_PATH+self.name+".bed"): + MAPPING_PATH = flat_files("mapping") + "/" + if os.path.isfile(MAPPING_PATH + self.name + ".bed"): return True return False @@ -412,7 +419,7 @@ class DatasetGroup(object): else: logger.debug("Cache not hit") - genotype_fn = locate_ignore_error(self.name+".geno", 'genotype') + genotype_fn = locate_ignore_error(self.name + ".geno", 'genotype') if genotype_fn: self.samplelist = get_group_samplelists.get_samplelist( "geno", genotype_fn) @@ -421,7 +428,7 @@ class DatasetGroup(object): if USE_REDIS: r.set(key, json.dumps(self.samplelist)) - r.expire(key, 60*5) + r.expire(key, 60 * 5) def all_samples_ordered(self): result = [] @@ -520,7 +527,6 @@ def datasets(group_name, this_group=None): break if tissue_already_exists: - #logger.debug("dataset_menu:", dataset_menu[i]['datasets']) dataset_menu[i]['datasets'].append((dataset, dataset_short)) else: dataset_menu.append(dict(tissue=tissue_name, @@ -528,7 +534,7 @@ def datasets(group_name, this_group=None): if USE_REDIS: r.set(key, pickle.dumps(dataset_menu, pickle.HIGHEST_PROTOCOL)) - r.expire(key, 60*5) + r.expire(key, 60 * 5) if this_group != None: this_group._datasets = dataset_menu @@ -537,7 +543,7 @@ def datasets(group_name, this_group=None): return dataset_menu -class DataSet(object): +class DataSet: """ DataSet class defines a dataset in webqtl, can be either Microarray, Published phenotype, genotype, or user input dataset(temp) @@ -569,15 +575,6 @@ class DataSet(object): self.group.get_samplelist() self.species = species.TheSpecies(self) - def get_desc(self): - """Gets overridden later, at least for Temp...used by trait's get_given_name""" - return None - - # Delete this eventually - @property - def riset(): - Weve_Renamed_This_As_Group - def get_accession_id(self): if self.type == "Publish": results = g.db.execute("""select InfoFiles.GN_AccesionId from InfoFiles, PublishFreeze, InbredSet where @@ -628,7 +625,7 @@ class DataSet(object): WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id AND ProbeFreeze.TissueId = Tissue.Id AND (ProbeSetFreeze.Name = '%s' OR ProbeSetFreeze.FullName = '%s' OR ProbeSetFreeze.ShortName = '%s') - """ % (query_args), "/dataset/"+self.name+".json", + """ % (query_args), "/dataset/" + self.name + ".json", lambda r: (r["id"], r["name"], r["full_name"], r["short_name"], r["data_scale"], r["tissue"]) ) @@ -651,6 +648,8 @@ class DataSet(object): "Dataset {} is not yet available in GeneNetwork.".format(self.name)) pass + + def get_trait_data(self, sample_list=None): if sample_list: self.samplelist = sample_list @@ -735,9 +734,6 @@ class PhenotypeDataSet(DataSet): DS_NAME_MAP['Publish'] = 'PhenotypeDataSet' def setup(self): - - #logger.debug("IS A PHENOTYPEDATASET") - # Fields in the database table self.search_fields = ['Phenotype.Post_publication_description', 'Phenotype.Pre_publication_description', @@ -1040,8 +1036,8 @@ class MrnaAssayDataSet(DataSet): else: description_display = this_trait.symbol - if (len(description_display) > 1 and description_display != 'N/A' and - len(target_string) > 1 and target_string != 'None'): + if (len(description_display) > 1 and description_display != 'N/A' + and len(target_string) > 1 and target_string != 'None'): description_display = description_display + '; ' + target_string.strip() # Save it for the jinja2 template diff --git a/wqflask/base/mrna_assay_tissue_data.py b/wqflask/base/mrna_assay_tissue_data.py index f1929518..8f8e2b0a 100644 --- a/wqflask/base/mrna_assay_tissue_data.py +++ b/wqflask/base/mrna_assay_tissue_data.py @@ -6,12 +6,14 @@ from utility import db_tools from utility import Bunch from utility.db_tools import escape +from gn3.db_utils import database_connector from utility.logger import getLogger -logger = getLogger(__name__ ) +logger = getLogger(__name__) -class MrnaAssayTissueData(object): + +class MrnaAssayTissueData: def __init__(self, gene_symbols=None): self.gene_symbols = gene_symbols @@ -20,7 +22,7 @@ class MrnaAssayTissueData(object): self.data = collections.defaultdict(Bunch) - query = '''select t.Symbol, t.GeneId, t.DataId, t.Chr, t.Mb, t.description, t.Probe_Target_Description + query = '''select t.Symbol, t.GeneId, t.DataId, t.Chr, t.Mb, t.description, t.Probe_Target_Description from ( select Symbol, max(Mean) as maxmean from TissueProbeSetXRef @@ -31,29 +33,31 @@ class MrnaAssayTissueData(object): # Due to the limit size of TissueProbeSetFreezeId table in DB, # performance of inner join is acceptable.MrnaAssayTissueData(gene_symbols=symbol_list) if len(gene_symbols) == 0: - query += '''Symbol!='' and Symbol Is Not Null group by Symbol) + query += '''Symbol!='' and Symbol Is Not Null group by Symbol) as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol and t.Mean = x.maxmean; ''' else: in_clause = db_tools.create_in_clause(gene_symbols) - #ZS: This was in the query, not sure why: http://docs.python.org/2/library/string.html?highlight=lower#string.lower + # ZS: This was in the query, not sure why: http://docs.python.org/2/library/string.html?highlight=lower#string.lower query += ''' Symbol in {} group by Symbol) as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol and t.Mean = x.maxmean; '''.format(in_clause) - results = g.db.execute(query).fetchall() - lower_symbols = [] + # lower_symbols = [] + lower_symbols = {} for gene_symbol in gene_symbols: + # lower_symbols[gene_symbol.lower()] = True if gene_symbol != None: - lower_symbols.append(gene_symbol.lower()) - + lower_symbols[gene_symbol.lower()] = True + results = list(g.db.execute(query).fetchall()) for result in results: symbol = result[0] - if symbol.lower() in lower_symbols: + if symbol is not None and lower_symbols.get(symbol.lower()): + symbol = symbol.lower() self.data[symbol].gene_id = result.GeneId @@ -64,16 +68,16 @@ class MrnaAssayTissueData(object): self.data[symbol].probe_target_description = result.Probe_Target_Description ########################################################################### - #Input: cursor, symbolList (list), dataIdDict(Dict) - #output: symbolValuepairDict (dictionary):one dictionary of Symbol and Value Pair, + # Input: cursor, symbolList (list), dataIdDict(Dict) + # output: symbolValuepairDict (dictionary):one dictionary of Symbol and Value Pair, # key is symbol, value is one list of expression values of one probeSet; - #function: get one dictionary whose key is gene symbol and value is tissue expression data (list type). - #Attention! All keys are lower case! + # function: get one dictionary whose key is gene symbol and value is tissue expression data (list type). + # Attention! All keys are lower case! ########################################################################### def get_symbol_values_pairs(self): id_list = [self.data[symbol].data_id for symbol in self.data] - + symbol_values_dict = {} if len(id_list) > 0: @@ -82,11 +86,13 @@ class MrnaAssayTissueData(object): WHERE TissueProbeSetData.Id IN {} and TissueProbeSetXRef.DataId = TissueProbeSetData.Id""".format(db_tools.create_in_clause(id_list)) + results = g.db.execute(query).fetchall() for result in results: if result.Symbol.lower() not in symbol_values_dict: symbol_values_dict[result.Symbol.lower()] = [result.value] else: - symbol_values_dict[result.Symbol.lower()].append(result.value) + symbol_values_dict[result.Symbol.lower()].append( + result.value) return symbol_values_dict diff --git a/wqflask/base/species.py b/wqflask/base/species.py index 2771d116..f303aabb 100644 --- a/wqflask/base/species.py +++ b/wqflask/base/species.py @@ -1,55 +1,66 @@ -import collections +from collections import OrderedDict +from dataclasses import dataclass +from dataclasses import InitVar +from typing import Optional, Dict +from flask import g -from flask import Flask, g +@dataclass +class TheSpecies: + """Data related to species.""" + dataset: Optional[Dict] = None + species_name: Optional[str] = None -from utility.logger import getLogger -logger = getLogger(__name__ ) - -class TheSpecies(object): - def __init__(self, dataset=None, species_name=None): - if species_name != None: - self.name = species_name + def __post_init__(self): + if self.species_name is not None: + self.name = self.species_name self.chromosomes = Chromosomes(species=self.name) else: - self.dataset = dataset self.chromosomes = Chromosomes(dataset=self.dataset) -class IndChromosome(object): - def __init__(self, name, length): - self.name = name - self.length = length + +@dataclass +class IndChromosome: + """Data related to IndChromosome""" + name: str + length: int @property def mb_length(self): - """Chromosome length in megabases""" + """Chromosome length in mega-bases""" return self.length / 1000000 -class Chromosomes(object): - def __init__(self, dataset=None, species=None): - self.chromosomes = collections.OrderedDict() - if species != None: - query = """ - Select - Chr_Length.Name, Chr_Length.OrderId, Length from Chr_Length, Species - where - Chr_Length.SpeciesId = Species.SpeciesId AND - Species.Name = '%s' - Order by OrderId - """ % species.capitalize() - else: + +@dataclass +class Chromosomes: + """Data related to a chromosome""" + dataset: InitVar[Dict] = None + species: Optional[str] = None + + def __post_init__(self, dataset): + if self.species is None: self.dataset = dataset - query = """ - Select - Chr_Length.Name, Chr_Length.OrderId, Length from Chr_Length, InbredSet - where - Chr_Length.SpeciesId = InbredSet.SpeciesId AND - InbredSet.Name = '%s' - Order by OrderId - """ % self.dataset.group.name - logger.sql(query) + @property + def chromosomes(self): + """Lazily fetch the chromosomes""" + chromosomes = OrderedDict() + if self.species is not None: + query = ( + "SELECT Chr_Length.Name, Chr_Length.OrderId, Length " + "FROM Chr_Length, Species WHERE " + "Chr_Length.SpeciesId = Species.SpeciesId AND " + "Species.Name = " + "'%s' ORDER BY OrderId" % self.species.capitalize()) + else: + query = ( + "SELECT Chr_Length.Name, Chr_Length.OrderId, " + "Length FROM Chr_Length, InbredSet WHERE " + "Chr_Length.SpeciesId = InbredSet.SpeciesId AND " + "InbredSet.Name = " + "'%s' ORDER BY OrderId" % self.dataset.group.name) results = g.db.execute(query).fetchall() - for item in results: - self.chromosomes[item.OrderId] = IndChromosome(item.Name, item.Length) + chromosomes[item.OrderId] = IndChromosome( + item.Name, item.Length) + return chromosomes diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py index df96d46e..d09cfd40 100644 --- a/wqflask/base/trait.py +++ b/wqflask/base/trait.py @@ -51,7 +51,7 @@ def create_trait(**kw): return None -class GeneralTrait(object): +class GeneralTrait: """ Trait class defines a trait in webqtl, can be either Microarray, Published phenotype, genotype, or user input trait @@ -337,74 +337,10 @@ def jsonable(trait): dataset_name=dataset.shortname, location=trait.location_repr ) - else: - return dict() - - -def jsonable_table_row(trait, dataset_name, index): - """Return a list suitable for json and intended to be displayed in a table - - Actual turning into json doesn't happen here though""" - - dataset = create_dataset(dataset_name) - - if dataset.type == "ProbeSet": - if trait.mean == "": - mean = "N/A" - else: - mean = "%.3f" % round(float(trait.mean), 2) - if trait.additive == "": - additive = "N/A" - else: - additive = "%.3f" % round(float(trait.additive), 2) - return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + hmac.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">', - index, - '<a href="/show_trait?trait_id=' + - str(trait.name)+'&dataset='+dataset.name + - '">'+str(trait.name)+'</a>', - trait.symbol, - trait.description_display, - trait.location_repr, - mean, - trait.LRS_score_repr, - trait.LRS_location_repr, - additive] - elif dataset.type == "Publish": - if trait.additive == "": - additive = "N/A" - else: - additive = "%.2f" % round(float(trait.additive), 2) - if trait.pubmed_id: - return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + hmac.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">', - index, - '<a href="/show_trait?trait_id=' + - str(trait.name)+'&dataset='+dataset.name + - '">'+str(trait.name)+'</a>', - trait.description_display, - trait.authors, - '<a href="' + trait.pubmed_link + '">' + trait.pubmed_text + '</href>', - trait.LRS_score_repr, - trait.LRS_location_repr, - additive] - else: - return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + hmac.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">', - index, - '<a href="/show_trait?trait_id=' + - str(trait.name)+'&dataset='+dataset.name + - '">'+str(trait.name)+'</a>', - trait.description_display, - trait.authors, - trait.pubmed_text, - trait.LRS_score_repr, - trait.LRS_location_repr, - additive] - elif dataset.type == "Geno": - return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + hmac.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">', - index, - '<a href="/show_trait?trait_id=' + - str(trait.name)+'&dataset='+dataset.name + - '">'+str(trait.name)+'</a>', - trait.location_repr] + elif dataset.name == "Temp": + return dict(name=trait.name, + dataset="Temp", + dataset_name="Temp") else: return dict() @@ -543,9 +479,9 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): else: description_display = trait.symbol - if (str(description_display or "") != "" and - description_display != 'N/A' and - str(target_string or "") != "" and target_string != 'None'): + if (str(description_display or "") != "" + and description_display != 'N/A' + and str(target_string or "") != "" and target_string != 'None'): description_display = description_display + '; ' + target_string.strip() # Save it for the jinja2 template @@ -639,6 +575,6 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): if str(trait.lrs or "") != "": trait.LRS_score_repr = LRS_score_repr = '%3.1f' % trait.lrs else: - raise KeyError(repr(trait.name) + - ' information is not found in the database.') + raise KeyError(repr(trait.name) + + ' information is not found in the database.') return trait diff --git a/wqflask/base/webqtlCaseData.py b/wqflask/base/webqtlCaseData.py index aa55470f..25b6cb8a 100644 --- a/wqflask/base/webqtlCaseData.py +++ b/wqflask/base/webqtlCaseData.py @@ -28,17 +28,20 @@ import utility.tools utility.tools.show_settings() + class webqtlCaseData: """one case data in one trait""" def __init__(self, name, value=None, variance=None, num_cases=None, name2=None): self.name = name - self.name2 = name2 # Other name (for traits like BXD65a) + # Other name (for traits like BXD65a) + self.name2 = name2 self.value = value # Trait Value self.variance = variance # Trait Variance self.num_cases = num_cases # Number of individuals/cases self.extra_attributes = None - self.this_id = None # Set a sane default (can't be just "id" cause that's a reserved word) + # Set a sane default (can't be just "id" cause that's a reserved word) + self.this_id = None self.outlier = None # Not set to True/False until later def __repr__(self): @@ -78,4 +81,4 @@ class webqtlCaseData: def display_num_cases(self): if self.num_cases is not None: return "%s" % self.num_cases - return "x"
\ No newline at end of file + return "x" diff --git a/wqflask/base/webqtlConfig.py b/wqflask/base/webqtlConfig.py index bb8704a5..39947158 100644 --- a/wqflask/base/webqtlConfig.py +++ b/wqflask/base/webqtlConfig.py @@ -1,4 +1,4 @@ -#########################################' +# ' # Environment Variables - public # # Note: much of this needs to handled by the settings/environment @@ -10,35 +10,35 @@ from utility.tools import valid_path, mk_dir, assert_dir, assert_writable_dir, flat_files, TEMPDIR -#Debug Level -#1 for debug, mod python will reload import each time +# Debug Level +# 1 for debug, mod python will reload import each time DEBUG = 1 -#USER privilege -USERDICT = {'guest':1,'user':2, 'admin':3, 'root':4} +# USER privilege +USERDICT = {'guest': 1, 'user': 2, 'admin': 3, 'root': 4} -#Set privileges +# Set privileges SUPER_PRIVILEGES = {'data': 'edit', 'metadata': 'edit', 'admin': 'edit-admins'} DEFAULT_PRIVILEGES = {'data': 'view', 'metadata': 'view', 'admin': 'not-admin'} -#minimum number of informative strains +# minimum number of informative strains KMININFORMATIVE = 5 -#Daily download limit from one IP +# Daily download limit from one IP DAILYMAXIMUM = 1000 -#maximum LRS value +# maximum LRS value MAXLRS = 460.0 -#MINIMUM Database public value +# MINIMUM Database public value PUBLICTHRESH = 0 -#EXTERNAL LINK ADDRESSES +# EXTERNAL LINK ADDRESSES PUBMEDLINK_URL = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=%s&dopt=Abstract" UCSC_BLAT = 'http://genome.ucsc.edu/cgi-bin/hgBlat?org=%s&db=%s&type=0&sort=0&output=0&userSeq=%s' UTHSC_BLAT = 'http://ucscbrowser.genenetwork.org/cgi-bin/hgBlat?org=%s&db=%s&type=0&sort=0&output=0&userSeq=%s' UTHSC_BLAT2 = 'http://ucscbrowserbeta.genenetwork.org/cgi-bin/hgBlat?org=%s&db=%s&type=0&sort=0&output=0&userSeq=%s' -GENOMEBROWSER_URL="https://genome.ucsc.edu/cgi-bin/hgTracks?db=%s&position=%s" +GENOMEBROWSER_URL = "https://genome.ucsc.edu/cgi-bin/hgTracks?db=%s&position=%s" NCBI_LOCUSID = "http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=%s" GENBANK_ID = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=Nucleotide&cmd=search&doptcmdl=DocSum&term=%s" OMIM_ID = "http://www.ncbi.nlm.nih.gov/omim/%s" @@ -56,7 +56,7 @@ GEMMA_URL = "http://www.chibi.ubc.ca/Gemma/gene/showGene.html?ncbiid=%s" ABA_URL = "http://mouse.brain-map.org/search/show?search_type=gene&search_term=%s" EBIGWAS_URL = "https://www.ebi.ac.uk/gwas/search?query=%s" WIKI_PI_URL = "http://severus.dbmi.pitt.edu/wiki-pi/index.php/search?q=%s" -ENSEMBLETRANSCRIPT_URL="http://useast.ensembl.org/Mus_musculus/Transcript/Idhistory?t=%s" +ENSEMBLETRANSCRIPT_URL = "http://useast.ensembl.org/Mus_musculus/Transcript/Idhistory?t=%s" DBSNP = 'http://ensembl.org/Mus_musculus/Variation/Population?v=%s' PROTEIN_ATLAS_URL = "http://www.proteinatlas.org/search/%s" OPEN_TARGETS_URL = "https://genetics.opentargets.org/gene/%s" @@ -71,13 +71,13 @@ RRID_RAT_URL = "https://rgd.mcw.edu/rgdweb/report/strain/main.html?id=%s" # want to reach this base dir assert_writable_dir(TEMPDIR) -TMPDIR = mk_dir(TEMPDIR+'/gn2/') +TMPDIR = mk_dir(TEMPDIR + '/gn2/') assert_writable_dir(TMPDIR) -CACHEDIR = mk_dir(TMPDIR+'/cache/') +CACHEDIR = mk_dir(TMPDIR + '/cache/') # We can no longer write into the git tree: -GENERATED_IMAGE_DIR = mk_dir(TMPDIR+'generated/') -GENERATED_TEXT_DIR = mk_dir(TMPDIR+'generated_text/') +GENERATED_IMAGE_DIR = mk_dir(TMPDIR + 'generated/') +GENERATED_TEXT_DIR = mk_dir(TMPDIR + 'generated_text/') # Make sure we have permissions to access these assert_writable_dir(CACHEDIR) @@ -85,12 +85,12 @@ assert_writable_dir(GENERATED_IMAGE_DIR) assert_writable_dir(GENERATED_TEXT_DIR) # Flat file directories -GENODIR = flat_files('genotype')+'/' +GENODIR = flat_files('genotype') + '/' assert_dir(GENODIR) # assert_dir(GENODIR+'bimbam') # for gemma # JSON genotypes are OBSOLETE -JSON_GENODIR = flat_files('genotype/json')+'/' +JSON_GENODIR = flat_files('genotype/json') + '/' if not valid_path(JSON_GENODIR): # fall back on old location (move the dir, FIXME) JSON_GENODIR = flat_files('json') @@ -98,4 +98,4 @@ if not valid_path(JSON_GENODIR): # Are we using the following...? PORTADDR = "http://50.16.251.170" INFOPAGEHREF = '/dbdoc/%s.html' -CGIDIR = '/webqtl/' #XZ: The variable name 'CGIDIR' should be changed to 'PYTHONDIR' +CGIDIR = '/webqtl/' # XZ: The variable name 'CGIDIR' should be changed to 'PYTHONDIR' diff --git a/wqflask/db/call.py b/wqflask/db/call.py index 0971d2a2..1fe0772b 100644 --- a/wqflask/db/call.py +++ b/wqflask/db/call.py @@ -4,7 +4,9 @@ from flask import g import string try: # Python2 support - import urllib.request, urllib.error, urllib.parse + import urllib.request + import urllib.error + import urllib.parse except: import urllib2 import json @@ -12,10 +14,11 @@ from utility.tools import USE_GN_SERVER, LOG_SQL, GN_SERVER_URL from utility.benchmark import Bench from utility.logger import getLogger -logger = getLogger(__name__ ) +logger = getLogger(__name__) # from inspect import stack + def fetch1(query, path=None, func=None): """Fetch one result as a Tuple using either a SQL query or the URI path to GN_SERVER (when USE_GN_SERVER is True). Apply func to @@ -35,6 +38,7 @@ GN_SERVER result when set (which should return a Tuple) else: return fetchone(query) + def fetchone(query): """Return tuple containing one row by calling SQL directly (the original fetchone, but with logging) @@ -46,6 +50,7 @@ original fetchone, but with logging) return res.fetchone() return logger.sql(query, helper) + def fetchall(query): """Return row iterator by calling SQL directly (the original fetchall, but with logging) @@ -57,6 +62,7 @@ original fetchall, but with logging) return res.fetchall() return logger.sql(query, helper) + def gn_server(path): """Return JSON record by calling GN_SERVER @@ -64,9 +70,9 @@ def gn_server(path): with Bench("GN_SERVER", LOG_SQL): res = () try: - res = urllib.request.urlopen(GN_SERVER_URL+path) + res = urllib.request.urlopen(GN_SERVER_URL + path) except: - res = urllib2.urlopen(GN_SERVER_URL+path) + res = urllib2.urlopen(GN_SERVER_URL + path) rest = res.read() res2 = json.loads(rest) logger.debug(res2) diff --git a/wqflask/db/gn_server.py b/wqflask/db/gn_server.py index da224112..f9b01658 100644 --- a/wqflask/db/gn_server.py +++ b/wqflask/db/gn_server.py @@ -3,7 +3,8 @@ from db.call import gn_server from utility.logger import getLogger -logger = getLogger(__name__ ) +logger = getLogger(__name__) + def menu_main(): return gn_server("/int/menu/main.json") diff --git a/wqflask/db/webqtlDatabaseFunction.py b/wqflask/db/webqtlDatabaseFunction.py index 2805febd..9ec650a4 100644 --- a/wqflask/db/webqtlDatabaseFunction.py +++ b/wqflask/db/webqtlDatabaseFunction.py @@ -21,27 +21,18 @@ # This module is used by GeneNetwork project (www.genenetwork.org) from db.call import fetch1 -from utility.tools import USE_GN_SERVER -from utility.logger import getLogger -logger = getLogger(__name__ ) - -########################################################################### -#output: cursor instance -#function: connect to database and return cursor instance -########################################################################### def retrieve_species(group): """Get the species of a group (e.g. returns string "mouse" on "BXD" """ - result = fetch1("select Species.Name from Species, InbredSet where InbredSet.Name = '%s' and InbredSet.SpeciesId = Species.Id" % (group), "/cross/"+group+".json", lambda r: (r["species"],))[0] - logger.debug("retrieve_species result:", result) + result = fetch1("select Species.Name from Species, InbredSet where InbredSet.Name = '%s' and InbredSet.SpeciesId = Species.Id" % ( + group), "/cross/" + group + ".json", lambda r: (r["species"],))[0] return result def retrieve_species_id(group): - - result = fetch1("select SpeciesId from InbredSet where Name = '%s'" % (group), "/cross/"+group+".json", lambda r: (r["species_id"],))[0] - logger.debug("retrieve_species_id result:", result) + result = fetch1("select SpeciesId from InbredSet where Name = '%s'" % ( + group), "/cross/" + group + ".json", lambda r: (r["species_id"],))[0] return result diff --git a/wqflask/maintenance/convert_dryad_to_bimbam.py b/wqflask/maintenance/convert_dryad_to_bimbam.py index 12ce35e9..18fbb8a1 100644 --- a/wqflask/maintenance/convert_dryad_to_bimbam.py +++ b/wqflask/maintenance/convert_dryad_to_bimbam.py @@ -41,7 +41,7 @@ def read_dryad_file(filename): return geno_rows - #for i, marker in enumerate(marker_list): + # for i, marker in enumerate(marker_list): # this_row = [] # this_row.append(marker) # this_row.append("X") @@ -52,18 +52,21 @@ def read_dryad_file(filename): # this_row.append(line.split(" ")[i+2]) # print("row: " + str(i)) # geno_rows.append(this_row) - # - #return geno_rows + # + # return geno_rows + def write_bimbam_files(geno_rows): with open('/home/zas1024/cfw_data/CFW_geno.txt', 'w') as geno_fh: for row in geno_rows: geno_fh.write(", ".join(row) + "\n") + def convert_dryad_to_bimbam(filename): geno_file_rows = read_dryad_file(filename) write_bimbam_files(geno_file_rows) -if __name__=="__main__": + +if __name__ == "__main__": input_filename = "/home/zas1024/cfw_data/" + sys.argv[1] + ".txt" convert_dryad_to_bimbam(input_filename) diff --git a/wqflask/maintenance/convert_geno_to_bimbam.py b/wqflask/maintenance/convert_geno_to_bimbam.py index d49742f2..078be529 100644 --- a/wqflask/maintenance/convert_geno_to_bimbam.py +++ b/wqflask/maintenance/convert_geno_to_bimbam.py @@ -20,9 +20,12 @@ import simplejson as json from pprint import pformat as pf -class EmptyConfigurations(Exception): pass -class Marker(object): +class EmptyConfigurations(Exception): + pass + + +class Marker: def __init__(self): self.name = None self.chr = None @@ -30,7 +33,8 @@ class Marker(object): self.Mb = None self.genotypes = [] -class ConvertGenoFile(object): + +class ConvertGenoFile: def __init__(self, input_file, output_files): self.input_file = input_file @@ -52,7 +56,7 @@ class ConvertGenoFile(object): '@pat': "0", '@het': "0.5", '@unk': "NA" - } + } self.configurations = {} self.input_fh = open(self.input_file) @@ -80,13 +84,14 @@ class ConvertGenoFile(object): genotypes = row_items[2:] for item_count, genotype in enumerate(genotypes): if genotype.upper().strip() in self.configurations: - this_marker.genotypes.append(self.configurations[genotype.upper().strip()]) + this_marker.genotypes.append( + self.configurations[genotype.upper().strip()]) else: this_marker.genotypes.append("NA") self.markers.append(this_marker.__dict__) - self.write_to_bimbam() + self.write_to_bimbam() def write_to_bimbam(self): with open(self.output_files[0], "w") as geno_fh: @@ -103,9 +108,11 @@ class ConvertGenoFile(object): with open(self.output_files[2], "w") as snp_fh: for marker in self.markers: if self.mb_exists: - snp_fh.write(marker['name'] +", " + str(int(float(marker['Mb'])*1000000)) + ", " + marker['chr'] + "\n") + snp_fh.write( + marker['name'] + ", " + str(int(float(marker['Mb']) * 1000000)) + ", " + marker['chr'] + "\n") else: - snp_fh.write(marker['name'] +", " + str(int(float(marker['cM'])*1000000)) + ", " + marker['chr'] + "\n") + snp_fh.write( + marker['name'] + ", " + str(int(float(marker['cM']) * 1000000)) + ", " + marker['chr'] + "\n") def get_sample_list(self, row_contents): self.sample_list = [] @@ -119,7 +126,7 @@ class ConvertGenoFile(object): self.sample_list = row_contents[3:] else: self.sample_list = row_contents[2:] - + def process_rows(self): for self.latest_row_pos, row in enumerate(self.input_fh): self.latest_row_value = row @@ -157,10 +164,14 @@ class ConvertGenoFile(object): group_name = ".".join(input_file.split('.')[:-1]) if group_name == "HSNIH-Palmer": continue - geno_output_file = os.path.join(new_directory, group_name + "_geno.txt") - pheno_output_file = os.path.join(new_directory, group_name + "_pheno.txt") - snp_output_file = os.path.join(new_directory, group_name + "_snps.txt") - output_files = [geno_output_file, pheno_output_file, snp_output_file] + geno_output_file = os.path.join( + new_directory, group_name + "_geno.txt") + pheno_output_file = os.path.join( + new_directory, group_name + "_pheno.txt") + snp_output_file = os.path.join( + new_directory, group_name + "_snps.txt") + output_files = [geno_output_file, + pheno_output_file, snp_output_file] print("%s -> %s" % ( os.path.join(old_directory, input_file), geno_output_file)) convertob = ConvertGenoFile(input_file, output_files) @@ -173,17 +184,18 @@ class ConvertGenoFile(object): print(" Exception:", why) print(traceback.print_exc()) print(" Found in row %s at tabular column %s" % (convertob.latest_row_pos, - convertob.latest_col_pos)) + convertob.latest_col_pos)) print(" Column is:", convertob.latest_col_value) print(" Row is:", convertob.latest_row_value) break -if __name__=="__main__": + +if __name__ == "__main__": Old_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype""" New_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype/bimbam""" #Input_File = """/home/zas1024/gene/genotype_files/genotypes/BXD.geno""" #Output_File = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/bxd.snps""" #convertob = ConvertGenoFile("/home/zas1024/gene/genotype_files/genotypes/SRxSHRSPF2.geno", "/home/zas1024/gene/genotype_files/new_genotypes/SRxSHRSPF2.json") - #convertob.convert() + # convertob.convert() ConvertGenoFile.process_all(Old_Geno_Directory, New_Geno_Directory) - #ConvertGenoFiles(Geno_Directory) + # ConvertGenoFiles(Geno_Directory) diff --git a/wqflask/maintenance/gen_select_dataset.py b/wqflask/maintenance/gen_select_dataset.py index 544e2fd1..db65a11f 100644 --- a/wqflask/maintenance/gen_select_dataset.py +++ b/wqflask/maintenance/gen_select_dataset.py @@ -55,16 +55,17 @@ from pprint import pformat as pf #conn = Engine.connect() + def parse_db_uri(): """Converts a database URI to the db name, host name, user name, and password""" parsed_uri = urllib.parse.urlparse(SQL_URI) db_conn_info = dict( - db = parsed_uri.path[1:], - host = parsed_uri.hostname, - user = parsed_uri.username, - passwd = parsed_uri.password) + db=parsed_uri.path[1:], + host=parsed_uri.hostname, + user=parsed_uri.username, + passwd=parsed_uri.password) print(db_conn_info) return db_conn_info @@ -119,21 +120,23 @@ def get_types(groups): else: if not phenotypes_exist(group_name) and not genotypes_exist(group_name): types[species].pop(group_name, None) - groups[species] = tuple(group for group in groups[species] if group[0] != group_name) - else: #ZS: This whole else statement might be unnecessary, need to check + groups[species] = tuple( + group for group in groups[species] if group[0] != group_name) + else: # ZS: This whole else statement might be unnecessary, need to check types_list = build_types(species, group_name) if len(types_list) > 0: types[species][group_name] = types_list else: types[species].pop(group_name, None) - groups[species] = tuple(group for group in groups[species] if group[0] != group_name) + groups[species] = tuple( + group for group in groups[species] if group[0] != group_name) return types def phenotypes_exist(group_name): #print("group_name:", group_name) Cursor.execute("""select Name from PublishFreeze - where PublishFreeze.Name = '%s'""" % (group_name+"Publish")) + where PublishFreeze.Name = '%s'""" % (group_name + "Publish")) results = Cursor.fetchone() #print("RESULTS:", results) @@ -143,10 +146,11 @@ def phenotypes_exist(group_name): else: return False + def genotypes_exist(group_name): #print("group_name:", group_name) Cursor.execute("""select Name from GenoFreeze - where GenoFreeze.Name = '%s'""" % (group_name+"Geno")) + where GenoFreeze.Name = '%s'""" % (group_name + "Geno")) results = Cursor.fetchone() #print("RESULTS:", results) @@ -156,6 +160,7 @@ def genotypes_exist(group_name): else: return False + def build_types(species, group): """Fetches tissues @@ -184,6 +189,7 @@ def build_types(species, group): return results + def get_datasets(types): """Build datasets list""" datasets = {} @@ -246,7 +252,7 @@ def build_datasets(species, group, type_name): dataset_text = "%s Genotypes" % group datasets.append((dataset_id, dataset_value, dataset_text)) - else: # for mRNA expression/ProbeSet + else: # for mRNA expression/ProbeSet Cursor.execute("""select ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.FullName from ProbeSetFreeze, ProbeFreeze, InbredSet, Tissue, Species where Species.Name = '%s' and Species.Id = InbredSet.SpeciesId and @@ -308,6 +314,7 @@ def _test_it(): datasets = build_datasets("Mouse", "BXD", "Hippocampus") #print("build_datasets:", pf(datasets)) + if __name__ == '__main__': Conn = MySQLdb.Connect(**parse_db_uri()) Cursor = Conn.cursor() diff --git a/wqflask/maintenance/generate_kinship_from_bimbam.py b/wqflask/maintenance/generate_kinship_from_bimbam.py index 60257b28..9f01d094 100644 --- a/wqflask/maintenance/generate_kinship_from_bimbam.py +++ b/wqflask/maintenance/generate_kinship_from_bimbam.py @@ -13,14 +13,17 @@ sys.path.append("..") import os import glob -class GenerateKinshipMatrices(object): + +class GenerateKinshipMatrices: def __init__(self, group_name, geno_file, pheno_file): self.group_name = group_name self.geno_file = geno_file self.pheno_file = pheno_file - + def generate_kinship(self): - gemma_command = "/gnu/store/xhzgjr0jvakxv6h3blj8z496xjig69b0-profile/bin/gemma -g " + self.geno_file + " -p " + self.pheno_file + " -gk 1 -outdir /home/zas1024/genotype_files/genotype/bimbam/ -o " + self.group_name + gemma_command = "/gnu/store/xhzgjr0jvakxv6h3blj8z496xjig69b0-profile/bin/gemma -g " + self.geno_file + \ + " -p " + self.pheno_file + \ + " -gk 1 -outdir /home/zas1024/genotype_files/genotype/bimbam/ -o " + self.group_name print("command:", gemma_command) os.system(gemma_command) @@ -33,9 +36,12 @@ class GenerateKinshipMatrices(object): group_name = ".".join(input_file.split('.')[:-1]) if group_name == "HSNIH-Palmer": continue - geno_input_file = os.path.join(bimbam_dir, group_name + "_geno.txt") - pheno_input_file = os.path.join(bimbam_dir, group_name + "_pheno.txt") - convertob = GenerateKinshipMatrices(group_name, geno_input_file, pheno_input_file) + geno_input_file = os.path.join( + bimbam_dir, group_name + "_geno.txt") + pheno_input_file = os.path.join( + bimbam_dir, group_name + "_pheno.txt") + convertob = GenerateKinshipMatrices( + group_name, geno_input_file, pheno_input_file) try: convertob.generate_kinship() except EmptyConfigurations as why: @@ -46,15 +52,15 @@ class GenerateKinshipMatrices(object): print(" Exception:", why) print(traceback.print_exc()) print(" Found in row %s at tabular column %s" % (convertob.latest_row_pos, - convertob.latest_col_pos)) + convertob.latest_col_pos)) print(" Column is:", convertob.latest_col_value) print(" Row is:", convertob.latest_row_value) break - - -if __name__=="__main__": + + +if __name__ == "__main__": Geno_Directory = """/export/local/home/zas1024/genotype_files/genotype/""" Bimbam_Directory = """/export/local/home/zas1024/genotype_files/genotype/bimbam/""" GenerateKinshipMatrices.process_all(Geno_Directory, Bimbam_Directory) - - #./gemma -g /home/zas1024/genotype_files/genotype/bimbam/BXD_geno.txt -p /home/zas1024/genotype_files/genotype/bimbam/BXD_pheno.txt -gk 1 -o BXD + + # ./gemma -g /home/zas1024/genotype_files/genotype/bimbam/BXD_geno.txt -p /home/zas1024/genotype_files/genotype/bimbam/BXD_pheno.txt -gk 1 -o BXD diff --git a/wqflask/maintenance/generate_probesetfreeze_file.py b/wqflask/maintenance/generate_probesetfreeze_file.py index b1e41e9a..e964c8ed 100644 --- a/wqflask/maintenance/generate_probesetfreeze_file.py +++ b/wqflask/maintenance/generate_probesetfreeze_file.py @@ -23,10 +23,12 @@ def get_cursor(): cursor = con.cursor() return cursor + def show_progress(process, counter): if counter % 1000 == 0: print("{}: {}".format(process, counter)) + def get_strains(cursor): cursor.execute("""select Strain.Name from Strain, StrainXRef, InbredSet @@ -42,6 +44,7 @@ def get_strains(cursor): return strains + def get_probeset_vals(cursor, dataset_name): cursor.execute(""" select ProbeSet.Id, ProbeSet.Name from ProbeSetXRef, @@ -77,6 +80,7 @@ def get_probeset_vals(cursor, dataset_name): return probeset_vals + def trim_strains(strains, probeset_vals): trimmed_strains = [] #print("probeset_vals is:", pf(probeset_vals)) @@ -89,6 +93,7 @@ def trim_strains(strains, probeset_vals): print("trimmed_strains:", pf(trimmed_strains)) return trimmed_strains + def write_data_matrix_file(strains, probeset_vals, filename): with open(filename, "wb") as fh: csv_writer = csv.writer(fh, delimiter=",", quoting=csv.QUOTE_ALL) @@ -103,10 +108,12 @@ def write_data_matrix_file(strains, probeset_vals, filename): csv_writer.writerow(row_data) show_progress("Writing", counter) + def main(): - filename = os.path.expanduser("~/gene/wqflask/maintenance/" + - "ProbeSetFreezeId_210_FullName_Eye_AXBXA_Illumina_V6.2" + - "(Oct08)_RankInv_Beta.txt") + filename = os.path.expanduser( + "~/gene/wqflask/maintenance/" + "ProbeSetFreezeId_210_FullName_Eye_AXBXA_Illumina_V6.2" + "(Oct08)_RankInv_Beta.txt") dataset_name = "Eye_AXBXA_1008_RankInv" cursor = get_cursor() @@ -117,5 +124,6 @@ def main(): trimmed_strains = trim_strains(strains, probeset_vals) write_data_matrix_file(trimmed_strains, probeset_vals, filename) + if __name__ == '__main__': main() diff --git a/wqflask/maintenance/geno_to_json.py b/wqflask/maintenance/geno_to_json.py index 7e7fd241..32e0e34b 100644 --- a/wqflask/maintenance/geno_to_json.py +++ b/wqflask/maintenance/geno_to_json.py @@ -25,11 +25,12 @@ from pprint import pformat as pf #from utility.tools import flat_files -class EmptyConfigurations(Exception): pass - +class EmptyConfigurations(Exception): + pass -class Marker(object): + +class Marker: def __init__(self): self.name = None self.chr = None @@ -37,23 +38,24 @@ class Marker(object): self.Mb = None self.genotypes = [] -class ConvertGenoFile(object): + +class ConvertGenoFile: def __init__(self, input_file, output_file): - + self.input_file = input_file self.output_file = output_file - + self.mb_exists = False self.cm_exists = False self.markers = [] - + self.latest_row_pos = None self.latest_col_pos = None - + self.latest_row_value = None self.latest_col_value = None - + def convert(self): self.haplotype_notation = { @@ -61,24 +63,23 @@ class ConvertGenoFile(object): '@pat': "0", '@het': "0.5", '@unk': "NA" - } - + } + self.configurations = {} #self.skipped_cols = 3 - - #if self.input_file.endswith(".geno.gz"): + + # if self.input_file.endswith(".geno.gz"): # print("self.input_file: ", self.input_file) # self.input_fh = gzip.open(self.input_file) - #else: + # else: self.input_fh = open(self.input_file) - + with open(self.output_file, "w") as self.output_fh: - #if self.file_type == "geno": + # if self.file_type == "geno": self.process_csv() - #elif self.file_type == "snps": + # elif self.file_type == "snps": # self.process_snps_file() - def process_csv(self): for row_count, row in enumerate(self.process_rows()): row_items = row.split("\t") @@ -100,31 +101,31 @@ class ConvertGenoFile(object): genotypes = row_items[2:] for item_count, genotype in enumerate(genotypes): if genotype.upper() in self.configurations: - this_marker.genotypes.append(self.configurations[genotype.upper()]) + this_marker.genotypes.append( + self.configurations[genotype.upper()]) else: this_marker.genotypes.append("NA") - - #print("this_marker is:", pf(this_marker.__dict__)) - #if this_marker.chr == "14": + + #print("this_marker is:", pf(this_marker.__dict__)) + # if this_marker.chr == "14": self.markers.append(this_marker.__dict__) with open(self.output_file, 'w') as fh: json.dump(self.markers, fh, indent=" ", sort_keys=True) - - # print('configurations:', str(configurations)) - #self.latest_col_pos = item_count + self.skipped_cols - #self.latest_col_value = item - - #if item_count != 0: - # self.output_fh.write(" ") - #self.output_fh.write(self.configurations[item.upper()]) - - #self.output_fh.write("\n") + # print('configurations:', str(configurations)) + #self.latest_col_pos = item_count + self.skipped_cols + #self.latest_col_value = item + + # if item_count != 0: + # self.output_fh.write(" ") + # self.output_fh.write(self.configurations[item.upper()]) + + # self.output_fh.write("\n") def process_rows(self): for self.latest_row_pos, row in enumerate(self.input_fh): - #if self.input_file.endswith(".geno.gz"): + # if self.input_file.endswith(".geno.gz"): # print("row: ", row) self.latest_row_value = row # Take care of headers @@ -171,26 +172,25 @@ class ConvertGenoFile(object): print(" Exception:", why) print(traceback.print_exc()) print(" Found in row %s at tabular column %s" % (convertob.latest_row_pos, - convertob.latest_col_pos)) + convertob.latest_col_pos)) print(" Column is:", convertob.latest_col_value) print(" Row is:", convertob.latest_row_value) break - - #def process_snps_file(cls, snps_file, new_directory): + + # def process_snps_file(cls, snps_file, new_directory): # output_file = os.path.join(new_directory, "mouse_families.json") # print("%s -> %s" % (snps_file, output_file)) # convertob = ConvertGenoFile(input_file, output_file) - -if __name__=="__main__": +if __name__ == "__main__": Old_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype""" New_Geno_Directory = """/export/local/home/zas1024/gn2-zach/genotype_files/genotype/json""" #Input_File = """/home/zas1024/gene/genotype_files/genotypes/BXD.geno""" #Output_File = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/bxd.snps""" #convertob = ConvertGenoFile("/home/zas1024/gene/genotype_files/genotypes/SRxSHRSPF2.geno", "/home/zas1024/gene/genotype_files/new_genotypes/SRxSHRSPF2.json") - #convertob.convert() + # convertob.convert() ConvertGenoFile.process_all(Old_Geno_Directory, New_Geno_Directory) - #ConvertGenoFiles(Geno_Directory) - + # ConvertGenoFiles(Geno_Directory) + #process_csv(Input_File, Output_File) diff --git a/wqflask/maintenance/get_group_samplelists.py b/wqflask/maintenance/get_group_samplelists.py index 3f9d0278..0a450d3f 100644 --- a/wqflask/maintenance/get_group_samplelists.py +++ b/wqflask/maintenance/get_group_samplelists.py @@ -4,12 +4,14 @@ import gzip from base import webqtlConfig + def get_samplelist(file_type, geno_file): if file_type == "geno": return get_samplelist_from_geno(geno_file) elif file_type == "plink": return get_samplelist_from_plink(geno_file) + def get_samplelist_from_geno(genofilename): if os.path.isfile(genofilename + '.gz'): genofilename += '.gz' @@ -33,6 +35,7 @@ def get_samplelist_from_geno(genofilename): samplelist = headers[3:] return samplelist + def get_samplelist_from_plink(genofilename): genofile = open(genofilename) diff --git a/wqflask/maintenance/print_benchmark.py b/wqflask/maintenance/print_benchmark.py index b24ce4f1..9d12da8a 100644 --- a/wqflask/maintenance/print_benchmark.py +++ b/wqflask/maintenance/print_benchmark.py @@ -5,7 +5,7 @@ import time from pprint import pformat as pf -class TheCounter(object): +class TheCounter: Counters = {} def __init__(self): @@ -15,15 +15,18 @@ class TheCounter(object): self.time_took = time.time() - start_time TheCounter.Counters[self.__class__.__name__] = self.time_took + class PrintAll(TheCounter): def print_it(self, counter): print(counter) + class PrintSome(TheCounter): def print_it(self, counter): if counter % 1000 == 0: print(counter) + class PrintNone(TheCounter): def print_it(self, counter): pass @@ -37,5 +40,6 @@ def new_main(): print(pf(TheCounter.Counters)) + if __name__ == '__main__': new_main() diff --git a/wqflask/maintenance/quantile_normalize.py b/wqflask/maintenance/quantile_normalize.py index 701b2b50..0cc963e5 100644 --- a/wqflask/maintenance/quantile_normalize.py +++ b/wqflask/maintenance/quantile_normalize.py @@ -14,42 +14,48 @@ from wqflask import app from utility.elasticsearch_tools import get_elasticsearch_connection from utility.tools import ELASTICSEARCH_HOST, ELASTICSEARCH_PORT, SQL_URI + def parse_db_uri(): """Converts a database URI to the db name, host name, user name, and password""" parsed_uri = urllib.parse.urlparse(SQL_URI) db_conn_info = dict( - db = parsed_uri.path[1:], - host = parsed_uri.hostname, - user = parsed_uri.username, - passwd = parsed_uri.password) + db=parsed_uri.path[1:], + host=parsed_uri.hostname, + user=parsed_uri.username, + passwd=parsed_uri.password) print(db_conn_info) return db_conn_info + def create_dataframe(input_file): with open(input_file) as f: ncols = len(f.readline().split("\t")) - input_array = np.loadtxt(open(input_file, "rb"), delimiter="\t", skiprows=1, usecols=list(range(1, ncols))) + input_array = np.loadtxt(open( + input_file, "rb"), delimiter="\t", skiprows=1, usecols=list(range(1, ncols))) return pd.DataFrame(input_array) -#This function taken from https://github.com/ShawnLYU/Quantile_Normalize +# This function taken from https://github.com/ShawnLYU/Quantile_Normalize + + def quantileNormalize(df_input): df = df_input.copy() - #compute rank + # compute rank dic = {} for col in df: - dic.update({col : sorted(df[col])}) + dic.update({col: sorted(df[col])}) sorted_df = pd.DataFrame(dic) - rank = sorted_df.mean(axis = 1).tolist() - #sort + rank = sorted_df.mean(axis=1).tolist() + # sort for col in df: t = np.searchsorted(np.sort(df[col]), df[col]) df[col] = [rank[i] for i in t] return df + def set_data(dataset_name): orig_file = "/home/zas1024/cfw_data/" + dataset_name + ".txt" @@ -64,10 +70,10 @@ def set_data(dataset_name): trait_name = line1.split('\t')[0] for i, sample in enumerate(sample_names): this_sample = { - "name": sample, - "value": line1.split('\t')[i+1], - "qnorm": line2.split('\t')[i+1] - } + "name": sample, + "value": line1.split('\t')[i + 1], + "qnorm": line2.split('\t')[i + 1] + } sample_list.append(this_sample) query = """SELECT Species.SpeciesName, InbredSet.InbredSetName, ProbeSetFreeze.FullName FROM Species, InbredSet, ProbeSetFreeze, ProbeFreeze, ProbeSetXRef, ProbeSet @@ -95,13 +101,14 @@ def set_data(dataset_name): } } + if __name__ == '__main__': Conn = MySQLdb.Connect(**parse_db_uri()) Cursor = Conn.cursor() - #es = Elasticsearch([{ + # es = Elasticsearch([{ # "host": ELASTICSEARCH_HOST, "port": ELASTICSEARCH_PORT - #}], timeout=60) if (ELASTICSEARCH_HOST and ELASTICSEARCH_PORT) else None + # }], timeout=60) if (ELASTICSEARCH_HOST and ELASTICSEARCH_PORT) else None es = get_elasticsearch_connection(for_user=False) @@ -116,8 +123,8 @@ if __name__ == '__main__': success, _ = bulk(es, set_data(sys.argv[1])) response = es.search( - index = "traits", doc_type = "trait", body = { - "query": { "match": { "name": "ENSMUSG00000028982" } } + index="traits", doc_type="trait", body={ + "query": {"match": {"name": "ENSMUSG00000028982"}} } ) diff --git a/wqflask/maintenance/set_resource_defaults.py b/wqflask/maintenance/set_resource_defaults.py index 4177c124..0f472494 100644 --- a/wqflask/maintenance/set_resource_defaults.py +++ b/wqflask/maintenance/set_resource_defaults.py @@ -37,20 +37,22 @@ import urllib.parse from utility.logger import getLogger logger = getLogger(__name__) + def parse_db_uri(): """Converts a database URI to the db name, host name, user name, and password""" parsed_uri = urllib.parse.urlparse(SQL_URI) db_conn_info = dict( - db = parsed_uri.path[1:], - host = parsed_uri.hostname, - user = parsed_uri.username, - passwd = parsed_uri.password) + db=parsed_uri.path[1:], + host=parsed_uri.hostname, + user=parsed_uri.username, + passwd=parsed_uri.password) print(db_conn_info) return db_conn_info + def insert_probeset_resources(default_owner_id): current_resources = Redis.hgetall("resources") Cursor.execute(""" SELECT @@ -63,20 +65,21 @@ def insert_probeset_resources(default_owner_id): resource_ob = {} resource_ob['name'] = resource[1] resource_ob['owner_id'] = default_owner_id - resource_ob['data'] = { "dataset" : str(resource[0])} + resource_ob['data'] = {"dataset": str(resource[0])} resource_ob['type'] = "dataset-probeset" if resource[2] < 1 and resource[3] > 0: - resource_ob['default_mask'] = { "data": "view", - "metadata": "view", - "admin": "not-admin"} + resource_ob['default_mask'] = {"data": "view", + "metadata": "view", + "admin": "not-admin"} else: - resource_ob['default_mask'] = { "data": "no-access", - "metadata": "no-access", - "admin": "not-admin"} + resource_ob['default_mask'] = {"data": "no-access", + "metadata": "no-access", + "admin": "not-admin"} resource_ob['group_masks'] = {} add_resource(resource_ob, update=False) + def insert_publish_resources(default_owner_id): current_resources = Redis.hgetall("resources") Cursor.execute(""" SELECT @@ -97,12 +100,12 @@ def insert_publish_resources(default_owner_id): else: resource_ob['name'] = str(resource[0]) resource_ob['owner_id'] = default_owner_id - resource_ob['data'] = { "dataset" : str(resource[1]) , - "trait" : str(resource[0])} + resource_ob['data'] = {"dataset": str(resource[1]), + "trait": str(resource[0])} resource_ob['type'] = "dataset-publish" - resource_ob['default_mask'] = { "data": "view", - "metadata": "view", - "admin": "not-admin"} + resource_ob['default_mask'] = {"data": "view", + "metadata": "view", + "admin": "not-admin"} resource_ob['group_masks'] = {} @@ -110,6 +113,7 @@ def insert_publish_resources(default_owner_id): else: continue + def insert_geno_resources(default_owner_id): current_resources = Redis.hgetall("resources") Cursor.execute(""" SELECT @@ -125,20 +129,21 @@ def insert_geno_resources(default_owner_id): resource_ob['owner_id'] = "c5ce8c56-78a6-474f-bcaf-7129d97f56ae" else: resource_ob['owner_id'] = default_owner_id - resource_ob['data'] = { "dataset" : str(resource[0]) } + resource_ob['data'] = {"dataset": str(resource[0])} resource_ob['type'] = "dataset-geno" if resource[2] < 1: - resource_ob['default_mask'] = { "data": "view", - "metadata": "view", - "admin": "not-admin"} + resource_ob['default_mask'] = {"data": "view", + "metadata": "view", + "admin": "not-admin"} else: - resource_ob['default_mask'] = { "data": "no-access", - "metadata": "no-access", - "admin": "not-admin"} + resource_ob['default_mask'] = {"data": "no-access", + "metadata": "no-access", + "admin": "not-admin"} resource_ob['group_masks'] = {} add_resource(resource_ob, update=False) + def insert_resources(default_owner_id): current_resources = get_resources() print("START") @@ -149,6 +154,7 @@ def insert_resources(default_owner_id): insert_probeset_resources(default_owner_id) print("AFTER PROBESET") + def main(): """Generates and outputs (as json file) the data for the main dropdown menus on the home page""" @@ -158,6 +164,7 @@ def main(): insert_resources(owner_id) + if __name__ == '__main__': Conn = MySQLdb.Connect(**parse_db_uri()) Cursor = Conn.cursor() diff --git a/wqflask/run_gunicorn.py b/wqflask/run_gunicorn.py index 58108e03..03f310eb 100644 --- a/wqflask/run_gunicorn.py +++ b/wqflask/run_gunicorn.py @@ -14,9 +14,11 @@ from utility.startup_config import app_config app_config() + @app.route("/gunicorn") def hello(): return "<h1 style='color:blue'>Hello There!</h1>" + if __name__ == "__main__": app.run(host='0.0.0.0') diff --git a/wqflask/tests/unit/base/test_data_set.py b/wqflask/tests/unit/base/test_data_set.py index 96563a16..66ad361d 100644 --- a/wqflask/tests/unit/base/test_data_set.py +++ b/wqflask/tests/unit/base/test_data_set.py @@ -31,14 +31,12 @@ class TestDataSetTypes(unittest.TestCase): def tearDown(self): self.app_context.pop() - @mock.patch('base.data_set.g') - def test_data_set_type(self, db_mock): + def test_data_set_type(self): """Test that DatasetType returns correctly if the Redis Instance is not empty and the name variable exists in the dictionary """ with app.app_context(): - db_mock.get = mock.Mock() redis_mock = mock.Mock() redis_mock.get.return_value = self.test_dataset self.assertEqual(DatasetType(redis_mock) @@ -89,10 +87,9 @@ class TestDataSetTypes(unittest.TestCase): '"B139_K_1206_M": "ProbeSet", ' '"B139_K_1206_R": "ProbeSet", ' '"Test": "ProbeSet"}')) - - db_mock.db.execute.assert_called_with( - ("SELECT ProbeSetFreeze.Id FROM ProbeSetFreeze " + - "WHERE ProbeSetFreeze.Name = \"Test\" ") + db_mock.db.execute.assert_called_once_with( + ("SELECT ProbeSetFreeze.Id FROM ProbeSetFreeze " + + "WHERE ProbeSetFreeze.Name = \"Test\" ") ) @mock.patch('base.data_set.g') @@ -148,9 +145,9 @@ class TestDataSetTypes(unittest.TestCase): '"Test": "Publish"}')) db_mock.db.execute.assert_called_with( - ("SELECT PublishFreeze.Name " + - "FROM PublishFreeze, InbredSet " + - "WHERE InbredSet.Name = 'Test' AND " + ("SELECT PublishFreeze.Name " + + "FROM PublishFreeze, InbredSet " + + "WHERE InbredSet.Name = 'Test' AND " "PublishFreeze.InbredSetId = InbredSet.Id") ) diff --git a/wqflask/tests/unit/base/test_species.py b/wqflask/tests/unit/base/test_species.py new file mode 100644 index 00000000..9b5c023c --- /dev/null +++ b/wqflask/tests/unit/base/test_species.py @@ -0,0 +1,116 @@ +"""Tests wqflask/base/species.py""" + +import unittest +from unittest import mock +from base.species import TheSpecies +from base.species import IndChromosome +from base.species import Chromosomes +from collections import OrderedDict +from wqflask import app +from dataclasses import dataclass + + +@dataclass +class MockChromosome: + OrderId: int + Name: str + Length: int + + +@dataclass +class MockGroup: + name: str + + +@dataclass +class MockDataset: + group: MockGroup + + +class TestTheSpecies(unittest.TestCase): + """Tests for TheSpecies class""" + @mock.patch('base.species.Chromosomes') + def test_create_species_with_null_species_name(self, mock_chromosome): + """Test that TheSpecies is instantiated correctly when the +species_name is provided.""" + mock_chromosome.return_value = 1 + test_species = TheSpecies(dataset="random_dataset", species_name="a") + self.assertEqual(test_species.name, "a") + self.assertEqual(test_species.chromosomes, 1) + + @mock.patch('base.species.Chromosomes') + def test_create_species_with_species_name(self, mock_chromosome): + """Test that TheSpecies is instantiated correctly when the +species_name is not provided.""" + mock_chromosome.return_value = 1 + test_species = TheSpecies(dataset="random_dataset") + self.assertEqual(test_species.dataset, "random_dataset") + self.assertEqual(test_species.chromosomes, 1) + mock_chromosome.assert_called_once_with(dataset="random_dataset") + + +class TestIndChromosome(unittest.TestCase): + """Tests for IndChromosome class""" + + def test_create_ind_chromosome(self): + """Test that IndChromosome is instantiated correctly""" + test_ind_chromosome = IndChromosome(name="Test", length=10000000) + self.assertEqual(test_ind_chromosome.name, "Test") + self.assertEqual(test_ind_chromosome.length, 10000000) + self.assertEqual(test_ind_chromosome.mb_length, 10) + + +class TestChromosomes(unittest.TestCase): + """Tests for Chromosomes class""" + maxDiff = None + + def setUp(self): + self.app_context = app.app_context() + self.app_context.push() + + def tearDown(self): + self.app_context.pop() + + @mock.patch("base.species.g") + def test_create_chromosomes_with_no_species(self, mock_db): + """Test instantiating a chromosome without a species""" + mock_db.db.execute.return_value.fetchall.return_value = [ + MockChromosome(1, "X", 100), + MockChromosome(2, "Y", 1000), + MockChromosome(3, "Z", 10000), + ] + mock_dataset = MockDataset(MockGroup("Random")) + test_chromosomes = Chromosomes(dataset=mock_dataset) + self.assertEqual( + list(test_chromosomes.chromosomes.keys()), + [1, 2, 3] + ) + self.assertEqual(test_chromosomes.dataset, mock_dataset) + mock_db.db.execute.assert_called_with( + "SELECT Chr_Length.Name, Chr_Length.OrderId, Length " + "FROM Chr_Length, InbredSet WHERE " + "Chr_Length.SpeciesId = InbredSet.SpeciesId AND " + "InbredSet.Name = 'Random' ORDER BY OrderId" + ) + + @mock.patch("base.species.g") + def test_create_chromosomes_with_species(self, mock_db): + """Test instantiating a chromosome with a species""" + mock_db.db.execute.return_value.fetchall.return_value = [ + MockChromosome(1, "X", 100), + MockChromosome(2, "Y", 1000), + MockChromosome(3, "Z", 10000), + ] + mock_dataset = MockDataset(MockGroup("Random")) + test_chromosomes = Chromosomes(dataset=mock_dataset, + species="testSpecies") + self.assertEqual( + list(test_chromosomes.chromosomes.keys()), + [1, 2, 3] + ) + mock_db.db.execute.assert_called_with( + "SELECT Chr_Length.Name, Chr_Length.OrderId, Length " + "FROM Chr_Length, Species WHERE " + "Chr_Length.SpeciesId = Species.SpeciesId AND " + "Species.Name = 'Testspecies' ORDER BY OrderId" + ) diff --git a/wqflask/tests/unit/base/test_webqtl_case_data.py b/wqflask/tests/unit/base/test_webqtl_case_data.py index 8e8ba482..e1555cb4 100644 --- a/wqflask/tests/unit/base/test_webqtl_case_data.py +++ b/wqflask/tests/unit/base/test_webqtl_case_data.py @@ -4,15 +4,16 @@ import unittest from wqflask import app # Required because of utility.tools in webqtlCaseData.py from base.webqtlCaseData import webqtlCaseData + class TestWebqtlCaseData(unittest.TestCase): """Tests for WebqtlCaseData class""" def setUp(self): self.w = webqtlCaseData(name="Test", - value=0, - variance=0.0, - num_cases=10, - name2="Test2") + value=0, + variance=0.0, + num_cases=10, + name2="Test2") def test_webqtl_case_data_repr(self): self.assertEqual( diff --git a/wqflask/tests/unit/utility/test_authentication_tools.py b/wqflask/tests/unit/utility/test_authentication_tools.py index 42dcae88..024ab43f 100644 --- a/wqflask/tests/unit/utility/test_authentication_tools.py +++ b/wqflask/tests/unit/utility/test_authentication_tools.py @@ -5,6 +5,7 @@ from unittest import mock from utility.authentication_tools import check_resource_availability from utility.authentication_tools import add_new_resource + class TestResponse: """Mock Test Response after a request""" @property diff --git a/wqflask/tests/unit/utility/test_chunks.py b/wqflask/tests/unit/utility/test_chunks.py index 8d90a1ec..1d349193 100644 --- a/wqflask/tests/unit/utility/test_chunks.py +++ b/wqflask/tests/unit/utility/test_chunks.py @@ -7,6 +7,7 @@ from utility.chunks import divide_into_chunks class TestChunks(unittest.TestCase): "Test Utility method for chunking" + def test_divide_into_chunks(self): "Check that a list is chunked correctly" self.assertEqual(divide_into_chunks([1, 2, 7, 3, 22, 8, 5, 22, 333], 3), diff --git a/wqflask/tests/unit/wqflask/api/test_correlation.py b/wqflask/tests/unit/wqflask/api/test_correlation.py index d0264b87..1089a36f 100644 --- a/wqflask/tests/unit/wqflask/api/test_correlation.py +++ b/wqflask/tests/unit/wqflask/api/test_correlation.py @@ -105,10 +105,10 @@ class TestCorrelations(unittest.TestCase): target_dataset = AttributeSetter({"group": group}) target_vals = [3.4, 6.2, 4.1, 3.4, 1.2, 5.6] - trait_data = {"S1": AttributeSetter({"value": 2.3}), "S2": AttributeSetter({"value": 1.1}), - "S3": AttributeSetter( - {"value": 6.3}), "S4": AttributeSetter({"value": 3.6}), "S5": AttributeSetter({"value": 4.1}), - "S6": AttributeSetter({"value": 5.0})} + trait_data = {"S1": AttributeSetter({"value": 2.3}), "S2": AttributeSetter({"value": 1.1}), + "S3": AttributeSetter( + {"value": 6.3}), "S4": AttributeSetter({"value": 3.6}), "S5": AttributeSetter({"value": 4.1}), + "S6": AttributeSetter({"value": 5.0})} this_trait = AttributeSetter({"data": trait_data}) mock_normalize.return_value = ([2.3, 1.1, 6.3, 3.6, 4.1, 5.0], [3.4, 6.2, 4.1, 3.4, 1.2, 5.6], 6) @@ -127,9 +127,9 @@ class TestCorrelations(unittest.TestCase): expected_pearsonr = [-0.21618688834430866, 0.680771605997119, 6] expected_spearmanr = [-0.11595420713048969, 0.826848213385815, 6] for i, val in enumerate(expected_pearsonr): - self.assertAlmostEqual(val, results_pearsonr[i],4) + self.assertAlmostEqual(val, results_pearsonr[i], 4) for i, val in enumerate(expected_spearmanr): - self.assertAlmostEqual(val, results_spearmanr[i],4) + self.assertAlmostEqual(val, results_spearmanr[i], 4) self.assertEqual(results_num_overlap, None) @mock.patch("wqflask.api.correlation.do_literature_correlation_for_all_traits") diff --git a/wqflask/tests/unit/wqflask/api/test_gen_menu.py b/wqflask/tests/unit/wqflask/api/test_gen_menu.py index 57eb1650..7e477da2 100644 --- a/wqflask/tests/unit/wqflask/api/test_gen_menu.py +++ b/wqflask/tests/unit/wqflask/api/test_gen_menu.py @@ -2,9 +2,7 @@ import unittest from unittest import mock -from wqflask import app from wqflask.api.gen_menu import gen_dropdown_json -from wqflask.api.gen_menu import get_species from wqflask.api.gen_menu import get_groups from wqflask.api.gen_menu import get_types from wqflask.api.gen_menu import get_datasets @@ -18,8 +16,6 @@ class TestGenMenu(unittest.TestCase): """Tests for the gen_menu module""" def setUp(self): - self.app_context = app.app_context() - self.app_context.push() self.test_group = { 'mouse': [ ['H_T1', @@ -70,212 +66,225 @@ class TestGenMenu(unittest.TestCase): } } - def tearDown(self): - self.app_context.pop() - - @mock.patch('wqflask.api.gen_menu.g') - def test_get_species(self, db_mock): - """Test that assertion is raised when dataset and dataset_name - are defined""" - db_mock.db.execute.return_value.fetchall.return_value = ( - ('human', 'Human'), - ('mouse', 'Mouse')) - self.assertEqual(get_species(), - [['human', 'Human'], ['mouse', 'Mouse']]) - db_mock.db.execute.assert_called_once_with( - "SELECT Name, MenuName FROM Species ORDER BY OrderId" - ) - - @mock.patch('wqflask.api.gen_menu.g') - def test_get_groups(self, db_mock): + def test_get_groups(self): """Test that species groups are grouped correctly""" - db_mock.db.execute.return_value.fetchall.side_effect = [ - # Mouse - (('BXD', 'BXD', None), - ('HLC', 'Liver: Normal Gene Expression with Genotypes (Merck)', - 'Test')), - # Human - (('H_T1', "H_T", "DescriptionA"), - ('H_T2', "H_T'", None)) - ] - - self.assertEqual(get_groups([["human", "Human"], ["mouse", "Mouse"]]), - self.test_group) - - for name in ["mouse", "human"]: - db_mock.db.execute.assert_any_call( - ("SELECT InbredSet.Name, InbredSet.FullName, " + - "IFNULL(InbredSet.Family, 'None') " + - "FROM InbredSet, Species WHERE Species.Name " + - "= '{}' AND InbredSet.SpeciesId = Species.Id GROUP by " + - "InbredSet.Name ORDER BY IFNULL(InbredSet.FamilyOrder, " + - "InbredSet.FullName) ASC, IFNULL(InbredSet.Family, " + - "InbredSet.FullName) ASC, InbredSet.FullName ASC, " + - "InbredSet.MenuOrderId ASC").format(name) + db_mock = mock.MagicMock() + with db_mock.cursor() as cursor: + cursor.fetchall.side_effect = [ + # Mouse + (('BXD', 'BXD', None), + ('HLC', ('Liver: Normal Gene Expression ' + 'with Genotypes (Merck)'), + 'Test')), + # Human + (('H_T1', "H_T", "DescriptionA"), + ('H_T2', "H_T'", None)) + ] + self.assertEqual(get_groups([["human", "Human"], + ["mouse", "Mouse"]], + db_mock), + self.test_group) + + for name in ["mouse", "human"]: + cursor.execute.assert_any_call( + ("SELECT InbredSet.Name, InbredSet.FullName, " + "IFNULL(InbredSet.Family, 'None') " + "FROM InbredSet, Species WHERE Species.Name " + "= '{}' AND InbredSet.SpeciesId = Species.Id GROUP by " + "InbredSet.Name ORDER BY IFNULL(InbredSet.FamilyOrder, " + "InbredSet.FullName) ASC, IFNULL(InbredSet.Family, " + "InbredSet.FullName) ASC, InbredSet.FullName ASC, " + "InbredSet.MenuOrderId ASC").format(name) + ) + + def test_phenotypes_exist_called_with_correct_query(self): + """Test that phenotypes_exist is called with the correct query""" + db_mock = mock.MagicMock() + with db_mock.cursor() as cursor: + cursor.fetchone.return_value = None + phenotypes_exist("test", db_mock) + cursor.execute.assert_called_with( + "SELECT Name FROM PublishFreeze " + "WHERE PublishFreeze.Name = 'testPublish'" ) - @mock.patch('wqflask.api.gen_menu.g') - def test_phenotypes_exist_called_with_correct_query(self, db_mock): - """Test that phenotypes_exist is called with the correct query""" - db_mock.db.execute.return_value.fetchone.return_value = None - phenotypes_exist("test") - db_mock.db.execute.assert_called_with( - "SELECT Name FROM PublishFreeze " - "WHERE PublishFreeze.Name = 'testPublish'" - ) - - @mock.patch('wqflask.api.gen_menu.g') - def test_phenotypes_exist_with_falsy_values(self, db_mock): + def test_phenotypes_exist_with_falsy_values(self): """Test that phenotype check returns correctly when given a None value""" - for x in [None, False, (), [], ""]: - db_mock.db.execute.return_value.fetchone.return_value = x - self.assertFalse(phenotypes_exist("test")) - - @mock.patch('wqflask.api.gen_menu.g') - def test_phenotypes_exist_with_truthy_value(self, db_mock): - """Test that phenotype check returns correctly when given Truthy """ - for x in ["x", ("result"), ["result"], [1]]: - db_mock.db.execute.return_value.fetchone.return_value = (x) - self.assertTrue(phenotypes_exist("test")) - - @mock.patch('wqflask.api.gen_menu.g') - def test_genotypes_exist_called_with_correct_query(self, db_mock): + db_mock = mock.MagicMock() + with db_mock.cursor() as cursor: + for x in [None, False, (), [], ""]: + cursor.fetchone.return_value = x + self.assertFalse(phenotypes_exist("test", db_mock)) + + def test_phenotypes_exist_with_truthy_value(self): + """Test that phenotype check returns correctly when given Truthy""" + db_mock = mock.MagicMock() + with db_mock.cursor() as conn: + with conn.cursor() as cursor: + for x in ["x", ("result"), ["result"], [1]]: + cursor.fetchone.return_value = (x) + self.assertTrue(phenotypes_exist("test", db_mock)) + + def test_genotypes_exist_called_with_correct_query(self): """Test that genotypes_exist is called with the correct query""" - db_mock.db.execute.return_value.fetchone.return_value = None - genotypes_exist("test") - db_mock.db.execute.assert_called_with( - "SELECT Name FROM GenoFreeze WHERE GenoFreeze.Name = 'testGeno'" - ) - - @mock.patch('wqflask.api.gen_menu.g') - def test_genotypes_exist_with_falsy_values(self, db_mock): - """Test that genotype check returns correctly when given - a None value""" - for x in [None, False, (), [], ""]: - db_mock.db.execute.return_value.fetchone.return_value = x - self.assertFalse(genotypes_exist("test")) + db_mock = mock.MagicMock() + with db_mock.cursor() as cursor: + cursor.fetchone.return_value = None + genotypes_exist("test", db_mock) + cursor.execute.assert_called_with( + "SELECT Name FROM GenoFreeze WHERE " + "GenoFreeze.Name = 'testGeno'" + ) + + def test_genotypes_exist_with_falsy_values(self): + """Test that genotype check returns correctly when given a None value - @mock.patch('wqflask.api.gen_menu.g') - def test_genotypes_exist_with_truthy_value(self, db_mock): + """ + db_mock = mock.MagicMock() + with db_mock.cursor() as cursor: + for x in [None, False, (), [], ""]: + cursor.fetchone.return_value = x + self.assertFalse(genotypes_exist("test", db_mock)) + + def test_genotypes_exist_with_truthy_value(self): """Test that genotype check returns correctly when given Truthy """ - for x in ["x", ("result"), ["result"], [1]]: - db_mock.db.execute.return_value.fetchone.return_value = (x) - self.assertTrue(phenotypes_exist("test")) + db_mock = mock.MagicMock() + with db_mock.cursor() as cursor: + for x in ["x", ("result"), ["result"], [1]]: + cursor.fetchone.return_value = (x) + self.assertTrue(phenotypes_exist("test", db_mock)) - @mock.patch('wqflask.api.gen_menu.g') - def test_build_datasets_with_type_phenotypes(self, db_mock): + def test_build_datasets_with_type_phenotypes(self): """Test that correct dataset is returned for a phenotype type""" - db_mock.db.execute.return_value.fetchall.return_value = ( - (602, "BXDPublish", "BXD Published Phenotypes"), - ) - self.assertEqual(build_datasets("Mouse", "BXD", "Phenotypes"), - [['602', "BXDPublish", "BXD Published Phenotypes"]]) - db_mock.db.execute.assert_called_with( - "SELECT InfoFiles.GN_AccesionId, PublishFreeze.Name, " + - "PublishFreeze.FullName FROM InfoFiles, PublishFreeze, " + - "InbredSet WHERE InbredSet.Name = 'BXD' AND " + - "PublishFreeze.InbredSetId = InbredSet.Id AND " + - "InfoFiles.InfoPageName = PublishFreeze.Name " + - "ORDER BY PublishFreeze.CreateTime ASC" - ) - self.assertEqual(build_datasets("Mouse", "MDP", "Phenotypes"), - [['602', "BXDPublish", "Mouse Phenome Database"]]) - - db_mock.db.execute.return_value.fetchall.return_value = () - db_mock.db.execute.return_value.fetchone.return_value = ( - "BXDPublish", "Mouse Phenome Database" - ) - self.assertEqual(build_datasets("Mouse", "MDP", "Phenotypes"), - [["None", "BXDPublish", "Mouse Phenome Database"]]) - - @mock.patch('wqflask.api.gen_menu.g') - def test_build_datasets_with_type_phenotypes_and_no_results(self, db_mock): + db_mock = mock.MagicMock() + with db_mock.cursor() as cursor: + cursor.fetchall.return_value = ( + (602, "BXDPublish", "BXD Published Phenotypes"), + ) + self.assertEqual(build_datasets("Mouse", "BXD", + "Phenotypes", db_mock), + [['602', "BXDPublish", + "BXD Published Phenotypes"]]) + cursor.execute.assert_called_with( + "SELECT InfoFiles.GN_AccesionId, PublishFreeze.Name, " + + "PublishFreeze.FullName FROM InfoFiles, PublishFreeze, " + + "InbredSet WHERE InbredSet.Name = 'BXD' AND " + + "PublishFreeze.InbredSetId = InbredSet.Id AND " + + "InfoFiles.InfoPageName = PublishFreeze.Name " + + "ORDER BY PublishFreeze.CreateTime ASC" + ) + self.assertEqual(build_datasets("Mouse", "MDP", + "Phenotypes", db_mock), + [['602', "BXDPublish", + "Mouse Phenome Database"]]) + + cursor.fetchall.return_value = () + cursor.fetchone.return_value = ( + "BXDPublish", "Mouse Phenome Database" + ) + self.assertEqual(build_datasets("Mouse", "MDP", + "Phenotypes", db_mock), + [["None", "BXDPublish", + "Mouse Phenome Database"]]) + + def test_build_datasets_with_type_phenotypes_and_no_results(self): """Test that correct dataset is returned for a phenotype type with no results """ - db_mock.db.execute.return_value.fetchall.return_value = None - db_mock.db.execute.return_value.fetchone.return_value = (121, - "text value") - self.assertEqual(build_datasets("Mouse", "BXD", "Phenotypes"), - [["None", "121", "text value"]]) - db_mock.db.execute.assert_called_with( - "SELECT PublishFreeze.Name, PublishFreeze.FullName " - "FROM PublishFreeze, InbredSet " - "WHERE InbredSet.Name = 'BXD' AND " - "PublishFreeze.InbredSetId = InbredSet.Id " - "ORDER BY PublishFreeze.CreateTime ASC" - ) - - @mock.patch('wqflask.api.gen_menu.g') - def test_build_datasets_with_type_genotypes(self, db_mock): + db_mock = mock.MagicMock() + with db_mock.cursor() as cursor: + cursor.fetchall.return_value = None + cursor.fetchone.return_value = (121, + "text value") + self.assertEqual(build_datasets("Mouse", "BXD", + "Phenotypes", db_mock), + [["None", "121", + "text value"]]) + cursor.execute.assert_called_with( + "SELECT PublishFreeze.Name, PublishFreeze.FullName " + "FROM PublishFreeze, InbredSet " + "WHERE InbredSet.Name = 'BXD' AND " + "PublishFreeze.InbredSetId = InbredSet.Id " + "ORDER BY PublishFreeze.CreateTime ASC" + ) + + def test_build_datasets_with_type_genotypes(self): """Test that correct dataset is returned for a phenotype type""" - db_mock.db.execute.return_value.fetchone.return_value = ( - 635, "HLCPublish", "HLC Published Genotypes" - ) - - self.assertEqual(build_datasets("Mouse", "HLC", "Genotypes"), - [["635", "HLCGeno", "HLC Genotypes"]]) - db_mock.db.execute.assert_called_with( - "SELECT InfoFiles.GN_AccesionId FROM InfoFiles, " - "GenoFreeze, InbredSet WHERE InbredSet.Name = 'HLC' AND " - "GenoFreeze.InbredSetId = InbredSet.Id AND " - "InfoFiles.InfoPageName = GenoFreeze.ShortName " + - "ORDER BY GenoFreeze.CreateTime DESC" - ) - db_mock.db.execute.return_value.fetchone.return_value = () - self.assertEqual(build_datasets("Mouse", "HLC", "Genotypes"), - [["None", "HLCGeno", "HLC Genotypes"]]) - - @mock.patch('wqflask.api.gen_menu.g') - def test_build_datasets_with_type_mrna(self, db_mock): + db_mock = mock.MagicMock() + with db_mock.cursor() as cursor: + cursor.fetchone.return_value = ( + 635, "HLCPublish", "HLC Published Genotypes" + ) + self.assertEqual(build_datasets("Mouse", "HLC", + "Genotypes", db_mock), + [["635", "HLCGeno", "HLC Genotypes"]]) + cursor.execute.assert_called_with( + "SELECT InfoFiles.GN_AccesionId FROM InfoFiles, " + "GenoFreeze, InbredSet WHERE InbredSet.Name = 'HLC' AND " + "GenoFreeze.InbredSetId = InbredSet.Id AND " + "InfoFiles.InfoPageName = GenoFreeze.ShortName " + "ORDER BY GenoFreeze.CreateTime DESC" + ) + cursor.fetchone.return_value = () + self.assertEqual(build_datasets("Mouse", "HLC", + "Genotypes", db_mock), + [["None", "HLCGeno", "HLC Genotypes"]]) + + def test_build_datasets_with_type_mrna(self): """Test that correct dataset is returned for a mRNA expression/ Probeset""" - db_mock.db.execute.return_value.fetchall.return_value = ( - (112, "HC_M2_0606_P", - "Hippocampus Consortium M430v2 (Jun06) PDNN"), ) - self.assertEqual(build_datasets("Mouse", "HLC", "mRNA"), [[ - "112", 'HC_M2_0606_P', "Hippocampus Consortium M430v2 (Jun06) PDNN" - ]]) - db_mock.db.execute.assert_called_once_with( - "SELECT ProbeSetFreeze.Id, ProbeSetFreeze.Name, " + - "ProbeSetFreeze.FullName FROM ProbeSetFreeze, " + - "ProbeFreeze, InbredSet, Tissue, Species WHERE " + - "Species.Name = 'Mouse' AND Species.Id = " + - "InbredSet.SpeciesId AND InbredSet.Name = 'HLC' AND " + - "ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id AND " + - "Tissue.Name = 'mRNA' AND ProbeFreeze.TissueId = " + - "Tissue.Id AND ProbeFreeze.InbredSetId = InbredSet.Id AND " + - "ProbeSetFreeze.public > 0 " + - "ORDER BY -ProbeSetFreeze.OrderList DESC, ProbeSetFreeze.CreateTime DESC") + db_mock = mock.MagicMock() + with db_mock.cursor() as cursor: + cursor.fetchall.return_value = ( + (112, "HC_M2_0606_P", + "Hippocampus Consortium M430v2 (Jun06) PDNN"), ) + self.assertEqual(build_datasets("Mouse", + "HLC", "mRNA", db_mock), + [["112", 'HC_M2_0606_P', + "Hippocampus Consortium M430v2 (Jun06) PDNN" + ]]) + cursor.execute.assert_called_once_with( + "SELECT ProbeSetFreeze.Id, ProbeSetFreeze.Name, " + "ProbeSetFreeze.FullName FROM ProbeSetFreeze, " + "ProbeFreeze, InbredSet, Tissue, Species WHERE " + "Species.Name = 'Mouse' AND Species.Id = " + "InbredSet.SpeciesId AND InbredSet.Name = 'HLC' AND " + "ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id AND " + "Tissue.Name = 'mRNA' AND ProbeFreeze.TissueId = " + "Tissue.Id AND ProbeFreeze.InbredSetId = InbredSet.Id AND " + "ProbeSetFreeze.public > 0 " + "ORDER BY -ProbeSetFreeze.OrderList DESC, " + "ProbeSetFreeze.CreateTime DESC") @mock.patch('wqflask.api.gen_menu.build_datasets') - @mock.patch('wqflask.api.gen_menu.g') - def test_build_types(self, db_mock, datasets_mock): + def test_build_types(self, datasets_mock): """Test that correct tissue metadata is returned""" + db_mock = mock.MagicMock() datasets_mock.return_value = [ ["112", 'HC_M2_0606_P', "Hippocampus Consortium M430v2 (Jun06) PDNN"] ] - db_mock.db.execute.return_value.fetchall.return_value = ( - ('Mouse Tissue'), ('Human Tissue'), ('Rat Tissue') - ) - self.assertEqual(build_types('mouse', 'random group'), - [['M', 'M', 'Molecular Traits'], - ['H', 'H', 'Molecular Traits'], - ['R', 'R', 'Molecular Traits']]) - db_mock.db.execute.assert_called_once_with( - "SELECT DISTINCT Tissue.Name " + - "FROM ProbeFreeze, ProbeSetFreeze, InbredSet, " + - "Tissue, Species WHERE Species.Name = 'mouse' " + - "AND Species.Id = InbredSet.SpeciesId AND " + - "InbredSet.Name = 'random group' AND " + - "ProbeFreeze.TissueId = Tissue.Id AND " + - "ProbeFreeze.InbredSetId = InbredSet.Id AND " + - "ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id " + - "ORDER BY Tissue.Name" - ) + with db_mock.cursor() as cursor: + cursor.fetchall.return_value = ( + ('Mouse Tissue'), ('Human Tissue'), ('Rat Tissue') + ) + self.assertEqual(build_types('mouse', 'random group', db_mock), + [['M', 'M', 'Molecular Traits'], + ['H', 'H', 'Molecular Traits'], + ['R', 'R', 'Molecular Traits']]) + cursor.execute.assert_called_once_with( + "SELECT DISTINCT Tissue.Name " + "FROM ProbeFreeze, ProbeSetFreeze, InbredSet, " + "Tissue, Species WHERE Species.Name = 'mouse' " + "AND Species.Id = InbredSet.SpeciesId AND " + "InbredSet.Name = 'random group' AND " + "ProbeFreeze.TissueId = Tissue.Id AND " + "ProbeFreeze.InbredSetId = InbredSet.Id AND " + "ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id " + "ORDER BY Tissue.Name" + ) @mock.patch('wqflask.api.gen_menu.build_types') @mock.patch('wqflask.api.gen_menu.genotypes_exist') @@ -297,7 +306,9 @@ class TestGenMenu(unittest.TestCase): build_types_mock.return_value = [ ['M', 'M', 'Molecular Trait Datasets'] ] - self.assertEqual(get_types(self.test_group), expected_result) + self.assertEqual(get_types(self.test_group, + mock.MagicMock()), + expected_result) @mock.patch('wqflask.api.gen_menu.build_types') @mock.patch('wqflask.api.gen_menu.genotypes_exist') @@ -315,10 +326,8 @@ class TestGenMenu(unittest.TestCase): genotypes_exist_mock.return_value = False build_types_mock.return_value = [] - self.assertEqual(get_types(self.test_group), { - 'mouse': {}, - 'human': {} - }) + self.assertEqual(get_types(self.test_group, mock.MagicMock()), + {'mouse': {}, 'human': {}}) @mock.patch('wqflask.api.gen_menu.build_types') @mock.patch('wqflask.api.gen_menu.genotypes_exist') @@ -345,7 +354,7 @@ class TestGenMenu(unittest.TestCase): 'human': { 'HLC': [['M', 'M', 'Molecular Trait Datasets']], 'BXD': [['M', 'M', 'Molecular Trait Datasets']]}} - self.assertEqual(get_types(self.test_group), + self.assertEqual(get_types(self.test_group, mock.MagicMock()), expected_result) @mock.patch('wqflask.api.gen_menu.build_datasets') @@ -367,7 +376,7 @@ class TestGenMenu(unittest.TestCase): 'BXD': {'Genotypes': 'Test', 'M': 'Test', 'Phenotypes': 'Test'}}} - self.assertEqual(get_datasets(self.test_type), + self.assertEqual(get_datasets(self.test_type, mock.MagicMock()), expected_result) @mock.patch('wqflask.api.gen_menu.build_datasets') @@ -381,13 +390,13 @@ class TestGenMenu(unittest.TestCase): 'H_T1': {}}, 'human': {'HLC': {}, 'BXD': {}}} - self.assertEqual(get_datasets(self.test_type), + self.assertEqual(get_datasets(self.test_type, mock.MagicMock()), expected_result) @mock.patch('wqflask.api.gen_menu.get_datasets') @mock.patch('wqflask.api.gen_menu.get_types') @mock.patch('wqflask.api.gen_menu.get_groups') - @mock.patch('wqflask.api.gen_menu.get_species') + @mock.patch('wqflask.api.gen_menu.get_all_species') def test_gen_dropdown_json(self, species_mock, groups_mock, @@ -411,4 +420,4 @@ class TestGenMenu(unittest.TestCase): 'groups': ['groupA', 'groupB', 'groupC', 'groupD'], 'species': ['speciesA', 'speciesB', 'speciesC', 'speciesD']} - self.assertEqual(gen_dropdown_json(), expected_result) + self.assertEqual(gen_dropdown_json(mock.MagicMock()), expected_result) diff --git a/wqflask/tests/unit/wqflask/api/test_mapping.py b/wqflask/tests/unit/wqflask/api/test_mapping.py index b094294a..159c982b 100644 --- a/wqflask/tests/unit/wqflask/api/test_mapping.py +++ b/wqflask/tests/unit/wqflask/api/test_mapping.py @@ -58,7 +58,7 @@ class TestMapping(unittest.TestCase): self.assertEqual(results_2, expected_results) - @mock.patch("wqflask.api.mapping.rqtl_mapping.run_rqtl_geno") + @mock.patch("wqflask.api.mapping.rqtl_mapping.run_rqtl") @mock.patch("wqflask.api.mapping.gemma_mapping.run_gemma") @mock.patch("wqflask.api.mapping.initialize_parameters") @mock.patch("wqflask.api.mapping.retrieve_sample_data") diff --git a/wqflask/tests/unit/wqflask/correlation/test_correlation_functions.py b/wqflask/tests/unit/wqflask/correlation/test_correlation_functions.py index 44d2e0fc..2bbeab1f 100644 --- a/wqflask/tests/unit/wqflask/correlation/test_correlation_functions.py +++ b/wqflask/tests/unit/wqflask/correlation/test_correlation_functions.py @@ -5,7 +5,7 @@ from wqflask.correlation.correlation_functions import cal_zero_order_corr_for_ti class TestCorrelationFunctions(unittest.TestCase): - + @mock.patch("wqflask.correlation.correlation_functions.MrnaAssayTissueData") def test_get_trait_symbol_and_tissue_values(self, mock_class): """test for getting trait symbol and tissue_values""" diff --git a/wqflask/tests/unit/wqflask/correlation/test_correlation_gn3.py b/wqflask/tests/unit/wqflask/correlation/test_correlation_gn3.py new file mode 100644 index 00000000..e1bd6d86 --- /dev/null +++ b/wqflask/tests/unit/wqflask/correlation/test_correlation_gn3.py @@ -0,0 +1,14 @@ +"""this module contains tests for code used in integrating to gn3 api""" +from unittest import TestCase +from base.data_set import create_dataset + +class TestCorrelation(TestCase): + + def test_create_dataset(self): + """test for creating datasets""" + + pass + def test_fetch_dataset_info(self): + """test for fetching dataset info data""" + + pass diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_display_mapping_results.py b/wqflask/tests/unit/wqflask/marker_regression/test_display_mapping_results.py index 8ae0f09f..f4869c45 100644 --- a/wqflask/tests/unit/wqflask/marker_regression/test_display_mapping_results.py +++ b/wqflask/tests/unit/wqflask/marker_regression/test_display_mapping_results.py @@ -9,6 +9,7 @@ from wqflask.marker_regression.display_mapping_results import ( class TestDisplayMappingResults(unittest.TestCase): """Basic Methods to test Mapping Results""" + def test_pil_colors(self): """Test that colors use PILLOW color format""" self.assertEqual(DisplayMappingResults.CLICKABLE_WEBQTL_REGION_COLOR, @@ -17,6 +18,7 @@ class TestDisplayMappingResults(unittest.TestCase): class TestHtmlGenWrapper(unittest.TestCase): """Test Wrapper around HTMLGen""" + def test_create_image(self): """Test HT.Image method""" self.assertEqual( @@ -37,7 +39,8 @@ class TestHtmlGenWrapper(unittest.TestCase): cgi="/testing/", enctype='multipart/form-data', name="formName", - submit=HtmlGenWrapper.create_input_tag(type_='hidden', name='Default_Name') + submit=HtmlGenWrapper.create_input_tag( + type_='hidden', name='Default_Name') ) test_image = HtmlGenWrapper.create_image_tag( src="test.png", diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_gemma_mapping.py b/wqflask/tests/unit/wqflask/marker_regression/test_gemma_mapping.py index fe2569b8..4003d68f 100644 --- a/wqflask/tests/unit/wqflask/marker_regression/test_gemma_mapping.py +++ b/wqflask/tests/unit/wqflask/marker_regression/test_gemma_mapping.py @@ -47,11 +47,11 @@ class TestGemmaMapping(unittest.TestCase): @mock.patch("wqflask.marker_regression.run_mapping.random.choice") @mock.patch("wqflask.marker_regression.gemma_mapping.os") @mock.patch("wqflask.marker_regression.gemma_mapping.gen_pheno_txt_file") - def test_run_gemma_firstrun_set_true(self, mock_gen_pheno_txt, mock_os, mock_choice, mock_gen_covar, mock_flat_files,mock_parse_loco): + def test_run_gemma_firstrun_set_true(self, mock_gen_pheno_txt, mock_os, mock_choice, mock_gen_covar, mock_flat_files, mock_parse_loco): """add tests for run_gemma where first run is set to true""" - this_chromosomes={} + this_chromosomes = {} for i in range(1, 5): - this_chromosomes[f'CH{i}']=(AttributeSetter({"name": f"CH{i}"})) + this_chromosomes[f'CH{i}'] = (AttributeSetter({"name": f"CH{i}"})) chromosomes = AttributeSetter({"chromosomes": this_chromosomes}) dataset_group = MockGroup( @@ -68,9 +68,10 @@ class TestGemmaMapping(unittest.TestCase): mock_parse_loco.return_value = [] results = run_gemma(this_trait=trait, this_dataset=dataset, samples=[ ], vals=[], covariates="", use_loco=True) - self.assertEqual(mock_os.system.call_count,2) + self.assertEqual(mock_os.system.call_count, 2) mock_gen_pheno_txt.assert_called_once() - mock_parse_loco.assert_called_once_with(dataset, "GP1_GWA_RRRRRR",True) + mock_parse_loco.assert_called_once_with( + dataset, "GP1_GWA_RRRRRR", True) mock_os.path.isfile.assert_called_once_with( ('/home/user/imgfile_output.assoc.txt')) self.assertEqual(mock_flat_files.call_count, 4) @@ -102,7 +103,8 @@ class TestGemmaMapping(unittest.TestCase): create_trait_side_effect = [] for i in range(4): - create_dataset_side_effect.append(AttributeSetter({"name": f'name_{i}'})) + create_dataset_side_effect.append( + AttributeSetter({"name": f'name_{i}'})) create_trait_side_effect.append( AttributeSetter({"data": [f'data_{i}']})) @@ -144,7 +146,7 @@ class TestGemmaMapping(unittest.TestCase): "files": [["file_name", "user", "~/file1"], ["file_name", "user", "~/file2"]] } - return_file="""X/Y\tM1\t28.457155\tQ\tE\tA\tMMB\t23.3\tW\t0.9\t0.85\t + return_file = """X/Y\tM1\t28.457155\tQ\tE\tA\tMMB\t23.3\tW\t0.9\t0.85\t chr4\tM2\t12\tQ\tE\tMMB\tR\t24\tW\t0.87\t0.5 Y\tM4\t12\tQ\tE\tMMB\tR\t11.6\tW\t0.21\t0.7 X\tM5\t12\tQ\tE\tMMB\tR\t21.1\tW\t0.65\t0.6""" @@ -159,11 +161,14 @@ X\tM5\t12\tQ\tE\tMMB\tR\t21.1\tW\t0.65\t0.6""" mock_open.side_effect = handles results = parse_loco_output( this_dataset={}, gwa_output_filename=".xw/") - expected_results= [ - {'name': 'M1', 'chr': 'X/Y', 'Mb': 2.8457155e-05, 'p_value': 0.85, 'additive': 23.3, 'lod_score': 0.07058107428570727}, - {'name': 'M2', 'chr': 4, 'Mb': 1.2e-05, 'p_value': 0.5, 'additive': 24.0, 'lod_score': 0.3010299956639812}, - {'name': 'M4', 'chr': 'Y', 'Mb': 1.2e-05, 'p_value': 0.7, 'additive': 11.6, 'lod_score': 0.1549019599857432}, - {'name': 'M5', 'chr': 'X', 'Mb': 1.2e-05, 'p_value': 0.6, 'additive': 21.1, 'lod_score': 0.22184874961635637}] + expected_results = [ + {'name': 'M1', 'chr': 'X/Y', 'Mb': 2.8457155e-05, 'p_value': 0.85, + 'additive': 23.3, 'lod_score': 0.07058107428570727}, + {'name': 'M2', 'chr': 4, 'Mb': 1.2e-05, 'p_value': 0.5, + 'additive': 24.0, 'lod_score': 0.3010299956639812}, + {'name': 'M4', 'chr': 'Y', 'Mb': 1.2e-05, 'p_value': 0.7, + 'additive': 11.6, 'lod_score': 0.1549019599857432}, + {'name': 'M5', 'chr': 'X', 'Mb': 1.2e-05, 'p_value': 0.6, 'additive': 21.1, 'lod_score': 0.22184874961635637}] self.assertEqual(expected_results, results) diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_plink_mapping.py b/wqflask/tests/unit/wqflask/marker_regression/test_plink_mapping.py index 5eec93f1..fd21a825 100644 --- a/wqflask/tests/unit/wqflask/marker_regression/test_plink_mapping.py +++ b/wqflask/tests/unit/wqflask/marker_regression/test_plink_mapping.py @@ -12,9 +12,10 @@ class AttributeSetter: def __init__(self, obj): for key, val in obj.items(): setattr(self, key, val) -class TestPlinkMapping(unittest.TestCase): +class TestPlinkMapping(unittest.TestCase): + def test_build_line_list(self): """test for building line list""" line_1 = "this is line one test" diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_qtlreaper_mapping.py b/wqflask/tests/unit/wqflask/marker_regression/test_qtlreaper_mapping.py index b47f877a..8b4337ec 100644 --- a/wqflask/tests/unit/wqflask/marker_regression/test_qtlreaper_mapping.py +++ b/wqflask/tests/unit/wqflask/marker_regression/test_qtlreaper_mapping.py @@ -1,21 +1,24 @@ import unittest -from unittest import mock -from wqflask.marker_regression.qtlreaper_mapping import gen_pheno_txt_file +from unittest import mock +from wqflask.marker_regression.qtlreaper_mapping import gen_pheno_txt_file + +# issues some methods in genofile object are not defined +# modify samples should equal to vals -#issues some methods in genofile object are not defined -#modify samples should equal to vals -class TestQtlReaperMapping(unittest.TestCase): - @mock.patch("wqflask.marker_regression.qtlreaper_mapping.TEMPDIR", "/home/user/data") - def test_gen_pheno_txt_file(self): - vals=["V1","x","V4","V3","x"] - samples=["S1","S2","S3","S4","S5"] - trait_filename="trait_file" - with mock.patch("builtins.open", mock.mock_open())as mock_open: - gen_pheno_txt_file(samples=samples,vals=vals,trait_filename=trait_filename) - mock_open.assert_called_once_with("/home/user/data/gn2/trait_file.txt","w") - filehandler=mock_open() - write_calls= [mock.call('Trait\t'),mock.call('S1\tS3\tS4\n'),mock.call('T1\t'),mock.call('V1\tV4\tV3')] - filehandler.write.assert_has_calls(write_calls) +class TestQtlReaperMapping(unittest.TestCase): + @mock.patch("wqflask.marker_regression.qtlreaper_mapping.TEMPDIR", "/home/user/data") + def test_gen_pheno_txt_file(self): + vals = ["V1", "x", "V4", "V3", "x"] + samples = ["S1", "S2", "S3", "S4", "S5"] + trait_filename = "trait_file" + with mock.patch("builtins.open", mock.mock_open())as mock_open: + gen_pheno_txt_file(samples=samples, vals=vals, + trait_filename=trait_filename) + mock_open.assert_called_once_with( + "/home/user/data/gn2/trait_file.txt", "w") + filehandler = mock_open() + write_calls = [mock.call('Trait\t'), mock.call( + 'S1\tS3\tS4\n'), mock.call('T1\t'), mock.call('V1\tV4\tV3')] - + filehandler.write.assert_has_calls(write_calls) diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py index c585f1df..9d13e943 100644 --- a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py +++ b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py @@ -1,48 +1,43 @@ import unittest from unittest import mock -from wqflask import app -from wqflask.marker_regression.rqtl_mapping import get_trait_data_type -from wqflask.marker_regression.rqtl_mapping import sanitize_rqtl_phenotype -from wqflask.marker_regression.rqtl_mapping import sanitize_rqtl_names +from dataclasses import dataclass -class TestRqtlMapping(unittest.TestCase): - - def setUp(self): - self.app_context=app.app_context() - self.app_context.push() - - def tearDown(self): - self.app_context.pop() - - - @mock.patch("wqflask.marker_regression.rqtl_mapping.g") - @mock.patch("wqflask.marker_regression.rqtl_mapping.logger") - def test_get_trait_data(self,mock_logger,mock_db): - """test for getting trait data_type return True""" - query_value="""SELECT value FROM TraitMetadata WHERE type='trait_data_type'""" - mock_db.db.execute.return_value.fetchone.return_value=["""{"type":"trait_data_type","name":"T1","traid_id":"fer434f"}"""] - results=get_trait_data_type("traid_id") - mock_db.db.execute.assert_called_with(query_value) - self.assertEqual(results,"fer434f") - - def test_sanitize_rqtl_phenotype(self): - """test for sanitizing rqtl phenotype""" - vals=['f',"x","r","x","x"] - results=sanitize_rqtl_phenotype(vals) - expected_phenotype_string='c(f,NA,r,NA,NA)' - - self.assertEqual(results,expected_phenotype_string) - - def test_sanitize_rqtl_names(self): - """test for sanitzing rqtl names""" - vals=['f',"x","r","x","x"] - expected_sanitized_name="c('f',NA,'r',NA,NA)" - results=sanitize_rqtl_names(vals) - self.assertEqual(expected_sanitized_name,results) - - - - +from wqflask.marker_regression.rqtl_mapping import run_rqtl +@dataclass +class MockGroup: + name: str + genofile: str +@dataclass +class MockDataset: + group: MockGroup +class TestRqtlMapping(unittest.TestCase): + """Tests for functions in rqtl_mapping.py""" + @mock.patch("wqflask.marker_regression.rqtl_mapping.requests.post") + @mock.patch("wqflask.marker_regression.rqtl_mapping.locate") + @mock.patch("wqflask.marker_regression.rqtl_mapping.write_phenotype_file") + def test_run_rqtl_with_perm(self, mock_write_pheno_file, mock_locate, mock_post): + """Test for run_rqtl with permutations > 0""" + dataset_group = MockGroup("GP1", "file_geno") + dataset = MockDataset(dataset_group) + + mock_write_pheno_file.return_value = "pheno_filename" + mock_locate.return_value = "geno_filename" + mock_post.return_value = mock.Mock(ok=True) + mock_post.return_value.json.return_value = {"perm_results": [], + "suggestive": 3, + "significant": 4, + "results" : []} + + results = run_rqtl(trait_name="the_trait", vals=[], samples=[], + dataset=dataset, mapping_scale="cM", model="normal", method="hk", + num_perm=5, perm_strata_list=[], do_control="false", control_marker="", + manhattan_plot=True, cofactors="") + + mock_write_pheno_file.assert_called_once() + mock_locate.assert_called_once() + mock_post.assert_called_once() + + self.assertEqual(results, ([], 3, 4, [])) diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_run_mapping.py b/wqflask/tests/unit/wqflask/marker_regression/test_run_mapping.py index a29d8cfb..c220a072 100644 --- a/wqflask/tests/unit/wqflask/marker_regression/test_run_mapping.py +++ b/wqflask/tests/unit/wqflask/marker_regression/test_run_mapping.py @@ -187,7 +187,8 @@ class TestRunMapping(unittest.TestCase): mock.call('Time/Date: 09/01/19 / 10:12:12\n'), mock.call('Population: Human GP1_\n'), mock.call( 'Data Set: dataser_1\n'), - mock.call('N Samples: 100\n'), mock.call('Transform - Quantile Normalized\n'), + mock.call('N Samples: 100\n'), mock.call( + 'Transform - Quantile Normalized\n'), mock.call('Gene Symbol: IGFI\n'), mock.call( 'Location: X1 @ 123313 Mb\n'), mock.call('Cofactors (dataset - trait):\n'), @@ -228,20 +229,20 @@ class TestRunMapping(unittest.TestCase): used_samples = ["S1", "S2"] sample_list = AttributeSetter({"sample_attribute_values": { "S1": { - "C1": "c1_value", - "C2": "c2_value", - "W1": "w1_value" + "c1": "c1_value", + "c2": "c2_value", + "w1": "w1_value" }, "S2": { - "W1": "w2_value", - "W2": "w2_value" + "w1": "w2_value", + "w2": "w2_value" }, "S3": { - "C1": "c1_value", - "C2": "c2_value" + "c1": "c1_value", + "c2": "c2_value" }, diff --git a/wqflask/tests/unit/wqflask/snp_browser/test_snp_browser.py b/wqflask/tests/unit/wqflask/snp_browser/test_snp_browser.py index ce3e7b83..89442c47 100644 --- a/wqflask/tests/unit/wqflask/snp_browser/test_snp_browser.py +++ b/wqflask/tests/unit/wqflask/snp_browser/test_snp_browser.py @@ -21,11 +21,11 @@ class TestSnpBrowser(unittest.TestCase): "transcript": "false", "exon": "false", "domain_2": "true", "function": "false", "function_details": "true"} strains = {"mouse": ["S1", "S2", "S3", "S4", "S5"], "rat": []} expected_results = ([['Index', 'SNP ID', 'Chr', 'Mb', 'Alleles', 'ConScore', - 'Domain 1', 'Domain 2', 'Details'], - ['S1', 'S2', 'S3', 'S4', 'S5']], 5, - ['index', 'snp_name', 'chr', 'mb_formatted', 'alleles', - 'conservation_score', 'domain_1', 'domain_2', - 'function_details', 'S1', 'S2', 'S3', 'S4', 'S5']) + 'Domain 1', 'Domain 2', 'Details'], + ['S1', 'S2', 'S3', 'S4', 'S5']], 5, + ['index', 'snp_name', 'chr', 'mb_formatted', 'alleles', + 'conservation_score', 'domain_1', 'domain_2', + 'function_details', 'S1', 'S2', 'S3', 'S4', 'S5']) results_with_snp = get_header_list( variant_type="SNP", strains=strains, species="Mouse", empty_columns=empty_columns) @@ -33,9 +33,9 @@ class TestSnpBrowser(unittest.TestCase): variant_type="InDel", strains=strains, species="rat", empty_columns=[]) expected_results_with_indel = ( ['Index', 'ID', 'Type', 'InDel Chr', 'Mb Start', - 'Mb End', 'Strand', 'Size', 'Sequence', 'Source'], 0, - ['index', 'indel_name', 'indel_type', 'indel_chr', 'indel_mb_s', - 'indel_mb_e', 'indel_strand', 'indel_size', 'indel_sequence', 'source_name']) + 'Mb End', 'Strand', 'Size', 'Sequence', 'Source'], 0, + ['index', 'indel_name', 'indel_type', 'indel_chr', 'indel_mb_s', + 'indel_mb_e', 'indel_strand', 'indel_size', 'indel_sequence', 'source_name']) self.assertEqual(expected_results, results_with_snp) self.assertEqual(expected_results_with_indel, results_with_indel) diff --git a/wqflask/tests/unit/wqflask/test_collect.py b/wqflask/tests/unit/wqflask/test_collect.py index 9a36132d..2a914fb2 100644 --- a/wqflask/tests/unit/wqflask/test_collect.py +++ b/wqflask/tests/unit/wqflask/test_collect.py @@ -11,6 +11,7 @@ app = Flask(__name__) class MockSession: """Helper class for mocking wqflask.collect.g.user_session.logged_in""" + def __init__(self, is_logged_in=False): self.is_logged_in = is_logged_in @@ -21,6 +22,7 @@ class MockSession: class MockFlaskG: """Helper class for mocking wqflask.collect.g.user_session""" + def __init__(self, is_logged_in=False): self.is_logged_in = is_logged_in diff --git a/wqflask/tests/unit/wqflask/test_server_side.py b/wqflask/tests/unit/wqflask/test_server_side.py index 4f91d8ca..be7ca2df 100644 --- a/wqflask/tests/unit/wqflask/test_server_side.py +++ b/wqflask/tests/unit/wqflask/test_server_side.py @@ -17,15 +17,18 @@ class TestServerSideTableTests(unittest.TestCase): def test_get_page(self): rows_count = 3 table_rows = [ - {'first': 'd', 'second': 4, 'third': 'zz'}, - {'first': 'b', 'second': 2, 'third': 'aa'}, + {'first': 'd', 'second': 4, 'third': 'zz'}, + {'first': 'b', 'second': 2, 'third': 'aa'}, {'first': 'c', 'second': 1, 'third': 'ss'}, ] headers = ['first', 'second', 'third'] - request_args = {'sEcho': '1', 'iSortCol_0': '1', 'iSortingCols': '1', 'sSortDir_0': 'asc', 'iDisplayStart': '0', 'iDisplayLength': '3'} + request_args = {'sEcho': '1', 'iSortCol_0': '1', 'iSortingCols': '1', + 'sSortDir_0': 'asc', 'iDisplayStart': '0', 'iDisplayLength': '3'} - test_page = ServerSideTable(rows_count, table_rows, headers, request_args).get_page() + test_page = ServerSideTable( + rows_count, table_rows, headers, request_args).get_page() self.assertEqual(test_page['sEcho'], '1') self.assertEqual(test_page['iTotalRecords'], 'nan') self.assertEqual(test_page['iTotalDisplayRecords'], '3') - self.assertEqual(test_page['data'], [{'first': 'b', 'second': 2, 'third': 'aa'}, {'first': 'c', 'second': 1, 'third': 'ss'}, {'first': 'd', 'second': 4, 'third': 'zz'}]) + self.assertEqual(test_page['data'], [{'first': 'b', 'second': 2, 'third': 'aa'}, { + 'first': 'c', 'second': 1, 'third': 'ss'}, {'first': 'd', 'second': 4, 'third': 'zz'}]) diff --git a/wqflask/tests/wqflask/show_trait/testSampleList.py b/wqflask/tests/wqflask/show_trait/testSampleList.py index 34c51e3e..305586ce 100644 --- a/wqflask/tests/wqflask/show_trait/testSampleList.py +++ b/wqflask/tests/wqflask/show_trait/testSampleList.py @@ -10,7 +10,8 @@ class TestSampleList(unittest.TestCase): characters_list = ["z", "f", "q", "s", "t", "a", "g"] names_list = ["temp1", "publish", "Sample", "Dataset"] - sorted_list_a=natural_sort(characters_list) - sorted_list_b=natural_sort(names_list) + sorted_list_a = natural_sort(characters_list) + sorted_list_b = natural_sort(names_list) self.assertEqual(sorted_list_a, ["a", "f", "g", "q", "s", "t", "z"]) - self.assertEqual(sorted_list_b,["Dataset", "Sample", "publish", "temp1"]) + self.assertEqual( + sorted_list_b, ["Dataset", "Sample", "publish", "temp1"]) diff --git a/wqflask/tests/wqflask/show_trait/test_show_trait.py b/wqflask/tests/wqflask/show_trait/test_show_trait.py index 8c866874..63df2ba5 100644 --- a/wqflask/tests/wqflask/show_trait/test_show_trait.py +++ b/wqflask/tests/wqflask/show_trait/test_show_trait.py @@ -72,7 +72,8 @@ class TestTraits(unittest.TestCase): mock_get.return_value = get_return_obj results = get_ncbi_summary(trait) mock_exists.assert_called_once() - mock_get.assert_called_once_with(f"http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=gene&id={trait.geneid}&retmode=json") + mock_get.assert_called_once_with( + f"http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=gene&id={trait.geneid}&retmode=json") self.assertEqual(results, "this is a summary of the geneid") @@ -242,7 +243,6 @@ class TestTraits(unittest.TestCase): self.assertEqual(get_genotype_scales(file_location), expected_results) mock_get_scales.assert_called_once_with(file_location) - @mock.patch("wqflask.show_trait.show_trait.locate_ignore_error") def test_get_scales_from_genofile_found(self, mock_ignore_location): """"add test for get scales from genofile where file is found""" diff --git a/wqflask/utility/Plot.py b/wqflask/utility/Plot.py index 61f408d2..9b2c6735 100644 --- a/wqflask/utility/Plot.py +++ b/wqflask/utility/Plot.py @@ -34,7 +34,7 @@ import utility.corestats as corestats from base import webqtlConfig from utility.pillow_utils import draw_rotated_text import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) # ---- Define common colours ---- # BLUE = ImageColor.getrgb("blue") @@ -47,6 +47,7 @@ COUR_FILE = "./wqflask/static/fonts/courbd.ttf" TAHOMA_FILE = "./wqflask/static/fonts/tahoma.ttf" # ---- END: FONT FILES ---- # + def cformat(d, rank=0): 'custom string format' strD = "%2.6f" % d @@ -68,22 +69,24 @@ def cformat(d, rank=0): strD = '0.0' return strD + def frange(start, end=None, inc=1.0): "A faster range-like function that does accept float increments..." if end == None: end = start + 0.0 start = 0.0 else: - start += 0.0 # force it to be a float + start += 0.0 # force it to be a float count = int((end - start) / inc) if start + count * inc != end: - # Need to adjust the count. AFAICT, it always comes up one short. + # Need to adjust the count. AFAICT, it always comes up one short. count += 1 L = [start] * count for i in range(1, count): L[i] = start + i * inc return L + def find_outliers(vals): """Calculates the upper and lower bounds of a set of sample/case values @@ -119,154 +122,163 @@ def find_outliers(vals): # parameter: data is either object returned by reaper permutation function (called by MarkerRegressionPage.py) # or the first object returned by direct (pair-scan) permu function (called by DirectPlotPage.py) -def plotBar(canvas, data, barColor=BLUE, axesColor=BLACK, labelColor=BLACK, XLabel=None, YLabel=None, title=None, offset= (60, 20, 40, 40), zoom = 1): + + +def plotBar(canvas, data, barColor=BLUE, axesColor=BLACK, labelColor=BLACK, XLabel=None, YLabel=None, title=None, offset=(60, 20, 40, 40), zoom=1): im_drawer = ImageDraw.Draw(canvas) xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset plotWidth = canvas.size[0] - xLeftOffset - xRightOffset plotHeight = canvas.size[1] - yTopOffset - yBottomOffset - if plotHeight<=0 or plotWidth<=0: - return + if plotHeight <= 0 or plotWidth <= 0: + return if len(data) < 2: - return + return max_D = max(data) min_D = min(data) - #add by NL 06-20-2011: fix the error: when max_D is infinite, log function in detScale will go wrong - if max_D == float('inf') or max_D>webqtlConfig.MAXLRS: - max_D=webqtlConfig.MAXLRS #maximum LRS value + # add by NL 06-20-2011: fix the error: when max_D is infinite, log function in detScale will go wrong + if max_D == float('inf') or max_D > webqtlConfig.MAXLRS: + max_D = webqtlConfig.MAXLRS # maximum LRS value xLow, xTop, stepX = detScale(min_D, max_D) - #reduce data - #ZS: Used to determine number of bins for permutation output - step = ceil((xTop-xLow)/50.0) + # reduce data + # ZS: Used to determine number of bins for permutation output + step = ceil((xTop - xLow) / 50.0) j = xLow dataXY = [] Count = [] while j <= xTop: - dataXY.append(j) - Count.append(0) - j += step + dataXY.append(j) + Count.append(0) + j += step for i, item in enumerate(data): - if item == float('inf') or item>webqtlConfig.MAXLRS: - item = webqtlConfig.MAXLRS #maximum LRS value - j = int((item-xLow)/step) - Count[j] += 1 + if item == float('inf') or item > webqtlConfig.MAXLRS: + item = webqtlConfig.MAXLRS # maximum LRS value + j = int((item - xLow) / step) + Count[j] += 1 - yLow, yTop, stepY=detScale(0, max(Count)) + yLow, yTop, stepY = detScale(0, max(Count)) - #draw data - xScale = plotWidth/(xTop-xLow) - yScale = plotHeight/(yTop-yLow) - barWidth = xScale*step + # draw data + xScale = plotWidth / (xTop - xLow) + yScale = plotHeight / (yTop - yLow) + barWidth = xScale * step for i, count in enumerate(Count): - if count: - xc = (dataXY[i]-xLow)*xScale+xLeftOffset - yc =-(count-yLow)*yScale+yTopOffset+plotHeight - im_drawer.rectangle( - xy=((xc+2, yc), (xc+barWidth-2, yTopOffset+plotHeight)), - outline=barColor, fill=barColor) - - #draw drawing region + if count: + xc = (dataXY[i] - xLow) * xScale + xLeftOffset + yc = -(count - yLow) * yScale + yTopOffset + plotHeight + im_drawer.rectangle( + xy=((xc + 2, yc), (xc + barWidth - 2, yTopOffset + plotHeight)), + outline=barColor, fill=barColor) + + # draw drawing region im_drawer.rectangle( - xy=((xLeftOffset, yTopOffset), (xLeftOffset+plotWidth, yTopOffset+plotHeight)) + xy=((xLeftOffset, yTopOffset), + (xLeftOffset + plotWidth, yTopOffset + plotHeight)) ) - #draw scale - scaleFont=ImageFont.truetype(font=COUR_FILE, size=11) - x=xLow - for i in range(int(stepX)+1): - xc=xLeftOffset+(x-xLow)*xScale - im_drawer.line( - xy=((xc, yTopOffset+plotHeight), (xc, yTopOffset+plotHeight+5)), - fill=axesColor) - strX = cformat(d=x, rank=0) - im_drawer.text( - text=strX, - xy=(xc-im_drawer.textsize(strX, font=scaleFont)[0]/2, - yTopOffset+plotHeight+14), font=scaleFont) - x+= (xTop - xLow)/stepX - - y=yLow - for i in range(int(stepY)+1): - yc=yTopOffset+plotHeight-(y-yLow)*yScale - im_drawer.line(xy=((xLeftOffset, yc), (xLeftOffset-5, yc)), fill=axesColor) - strY = "%d" %y - im_drawer.text( - text=strY, - xy=(xLeftOffset-im_drawer.textsize(strY, font=scaleFont)[0]-6, yc+5), - font=scaleFont) - y+= (yTop - yLow)/stepY - - #draw label - labelFont=ImageFont.truetype(font=TAHOMA_FILE, size=17) + # draw scale + scaleFont = ImageFont.truetype(font=COUR_FILE, size=11) + x = xLow + for i in range(int(stepX) + 1): + xc = xLeftOffset + (x - xLow) * xScale + im_drawer.line( + xy=((xc, yTopOffset + plotHeight), + (xc, yTopOffset + plotHeight + 5)), + fill=axesColor) + strX = cformat(d=x, rank=0) + im_drawer.text( + text=strX, + xy=(xc - im_drawer.textsize(strX, font=scaleFont)[0] / 2, + yTopOffset + plotHeight + 14), font=scaleFont) + x += (xTop - xLow) / stepX + + y = yLow + for i in range(int(stepY) + 1): + yc = yTopOffset + plotHeight - (y - yLow) * yScale + im_drawer.line( + xy=((xLeftOffset, yc), (xLeftOffset - 5, yc)), fill=axesColor) + strY = "%d" % y + im_drawer.text( + text=strY, + xy=(xLeftOffset - im_drawer.textsize(strY, + font=scaleFont)[0] - 6, yc + 5), + font=scaleFont) + y += (yTop - yLow) / stepY + + # draw label + labelFont = ImageFont.truetype(font=TAHOMA_FILE, size=17) if XLabel: - im_drawer.text( - text=XLabel, - xy=(xLeftOffset+( - plotWidth-im_drawer.textsize(XLabel, font=labelFont)[0])/2.0, - yTopOffset+plotHeight+yBottomOffset-10), - font=labelFont, fill=labelColor) + im_drawer.text( + text=XLabel, + xy=(xLeftOffset + ( + plotWidth - im_drawer.textsize(XLabel, font=labelFont)[0]) / 2.0, + yTopOffset + plotHeight + yBottomOffset - 10), + font=labelFont, fill=labelColor) if YLabel: draw_rotated_text(canvas, text=YLabel, xy=(19, - yTopOffset+plotHeight-( - plotHeight-im_drawer.textsize( - YLabel, font=labelFont)[0])/2.0), + yTopOffset + plotHeight - ( + plotHeight - im_drawer.textsize( + YLabel, font=labelFont)[0]) / 2.0), font=labelFont, fill=labelColor, angle=90) - labelFont=ImageFont.truetype(font=VERDANA_FILE, size=16) + labelFont = ImageFont.truetype(font=VERDANA_FILE, size=16) if title: - im_drawer.text( - text=title, - xy=(xLeftOffset+(plotWidth-im_drawer.textsize( - title, font=labelFont)[0])/2.0, - 20), - font=labelFont, fill=labelColor) + im_drawer.text( + text=title, + xy=(xLeftOffset + (plotWidth - im_drawer.textsize( + title, font=labelFont)[0]) / 2.0, + 20), + font=labelFont, fill=labelColor) # This function determines the scale of the plot + + def detScaleOld(min, max): - if min>=max: + if min >= max: return None elif min == -1.0 and max == 1.0: return [-1.2, 1.2, 12] else: - a=max-min - b=floor(log10(a)) - c=pow(10.0, b) - if a < c*5.0: - c/=2.0 - #print a,b,c - low=c*floor(min/c) - high=c*ceil(max/c) - return [low, high, round((high-low)/c)] - -def detScale(min=0,max=0): - - if min>=max: + a = max - min + b = floor(log10(a)) + c = pow(10.0, b) + if a < c * 5.0: + c /= 2.0 + # print a,b,c + low = c * floor(min / c) + high = c * ceil(max / c) + return [low, high, round((high - low) / c)] + + +def detScale(min=0, max=0): + + if min >= max: return None elif min == -1.0 and max == 1.0: return [-1.2, 1.2, 12] else: - a=max-min + a = max - min if max != 0: - max += 0.1*a + max += 0.1 * a if min != 0: - if min > 0 and min < 0.1*a: + if min > 0 and min < 0.1 * a: min = 0.0 else: - min -= 0.1*a - a=max-min - b=floor(log10(a)) - c=pow(10.0, b) - low=c*floor(min/c) - high=c*ceil(max/c) - n = round((high-low)/c) + min -= 0.1 * a + a = max - min + b = floor(log10(a)) + c = pow(10.0, b) + low = c * floor(min / c) + high = c * ceil(max / c) + n = round((high - low) / c) div = 2.0 while n < 5 or n > 15: if n < 5: @@ -274,23 +286,27 @@ def detScale(min=0,max=0): else: c *= div if div == 2.0: - div =5.0 + div = 5.0 else: - div =2.0 - low=c*floor(min/c) - high=c*ceil(max/c) - n = round((high-low)/c) + div = 2.0 + low = c * floor(min / c) + high = c * ceil(max / c) + n = round((high - low) / c) return [low, high, n] + def bluefunc(x): - return 1.0 / (1.0 + exp(-10*(x-0.6))) + return 1.0 / (1.0 + exp(-10 * (x - 0.6))) + def redfunc(x): - return 1.0 / (1.0 + exp(10*(x-0.5))) + return 1.0 / (1.0 + exp(10 * (x - 0.5))) + def greenfunc(x): - return 1 - pow(redfunc(x+0.2), 2) - bluefunc(x-0.3) + return 1 - pow(redfunc(x + 0.2), 2) - bluefunc(x - 0.3) + def colorSpectrum(n=100): multiple = 10 @@ -303,26 +319,27 @@ def colorSpectrum(n=100): return [ImageColor.getrgb("rgb(100%,0%,0%)"), ImageColor.getrgb("rgb(0%,100%,0%)"), ImageColor.getrgb("rgb(0%,0%,100%)")] - N = n*multiple - out = [None]*N; + N = n * multiple + out = [None] * N for i in range(N): - x = float(i)/N + x = float(i) / N out[i] = ImageColor.getrgb("rgb({}%,{}%,{}%".format( - *[int(i*100) for i in ( + *[int(i * 100) for i in ( redfunc(x), greenfunc(x), bluefunc(x))])) out2 = [out[0]] - step = N/float(n-1) + step = N / float(n - 1) j = 0 - for i in range(n-2): + for i in range(n - 2): j += step out2.append(out[int(j)]) out2.append(out[-1]) return out2 + def _test(): import doctest doctest.testmod() -if __name__=="__main__": +if __name__ == "__main__": _test() diff --git a/wqflask/utility/TDCell.py b/wqflask/utility/TDCell.py index 8de8e050..4b0f4b1d 100644 --- a/wqflask/utility/TDCell.py +++ b/wqflask/utility/TDCell.py @@ -33,9 +33,9 @@ class TDCell: def __init__(self, html="", text="", val=0.0): - self.html = html #html, for web page - self.text = text #text value, for output to a text file - self.val = val #sort by value + self.html = html # html, for web page + self.text = text # text value, for output to a text file + self.val = val # sort by value def __str__(self): return self.text diff --git a/wqflask/utility/THCell.py b/wqflask/utility/THCell.py index dde221b5..f533dcb8 100644 --- a/wqflask/utility/THCell.py +++ b/wqflask/utility/THCell.py @@ -33,10 +33,10 @@ class THCell: def __init__(self, html="", text="", sort=1, idx=-1): - self.html = html #html, for web page - self.text = text #Column text value - self.sort = sort #0: not sortable, 1: yes - self.idx = idx #sort by value + self.html = html # html, for web page + self.text = text # Column text value + self.sort = sort # 0: not sortable, 1: yes + self.idx = idx # sort by value def __str__(self): return self.text diff --git a/wqflask/utility/__init__.py b/wqflask/utility/__init__.py index 204ff59a..25273fa0 100644 --- a/wqflask/utility/__init__.py +++ b/wqflask/utility/__init__.py @@ -2,16 +2,18 @@ from pprint import pformat as pf # Todo: Move these out of __init__ -class Bunch(object): + +class Bunch: """Like a dictionary but using object notation""" - def __init__(self, **kw): - self.__dict__ = kw + + def __init__(self, **kw): + self.__dict__ = kw def __repr__(self): return pf(self.__dict__) -class Struct(object): +class Struct: '''The recursive class for building and representing objects with. From http://stackoverflow.com/a/6573827/1175849 @@ -30,6 +32,4 @@ class Struct(object): def __repr__(self): return '{%s}' % str(', '.join('%s : %s' % (k, repr(v)) for - (k, v) in list(self.__dict__.items()))) - - + (k, v) in list(self.__dict__.items()))) diff --git a/wqflask/utility/after.py b/wqflask/utility/after.py index 06091ecb..2b560e48 100644 --- a/wqflask/utility/after.py +++ b/wqflask/utility/after.py @@ -7,6 +7,7 @@ from flask import g from wqflask import app + def after_this_request(f): if not hasattr(g, 'after_request_callbacks'): g.after_request_callbacks = [] diff --git a/wqflask/utility/authentication_tools.py b/wqflask/utility/authentication_tools.py index 672b36d5..57dbf8ba 100644 --- a/wqflask/utility/authentication_tools.py +++ b/wqflask/utility/authentication_tools.py @@ -11,6 +11,7 @@ from utility.redis_tools import (get_redis_conn, add_resource) Redis = get_redis_conn() + def check_resource_availability(dataset, trait_id=None): # At least for now assume temporary entered traits are accessible if type(dataset) == str or dataset.type == "Temp": diff --git a/wqflask/utility/benchmark.py b/wqflask/utility/benchmark.py index ea5a0ab6..6ece2f21 100644 --- a/wqflask/utility/benchmark.py +++ b/wqflask/utility/benchmark.py @@ -4,9 +4,10 @@ import time from utility.tools import LOG_BENCH from utility.logger import getLogger -logger = getLogger(__name__ ) +logger = getLogger(__name__) -class Bench(object): + +class Bench: entries = collections.OrderedDict() def __init__(self, name=None, write_output=LOG_BENCH): @@ -18,7 +19,8 @@ class Bench(object): if self.name: logger.debug("Starting benchmark: %s" % (self.name)) else: - logger.debug("Starting benchmark at: %s [%i]" % (inspect.stack()[1][3], inspect.stack()[1][2])) + logger.debug("Starting benchmark at: %s [%i]" % ( + inspect.stack()[1][3], inspect.stack()[1][2])) self.start_time = time.time() def __exit__(self, type, value, traceback): @@ -32,14 +34,16 @@ class Bench(object): logger.info(" %s took: %f seconds" % (name, (time_taken))) if self.name: - Bench.entries[self.name] = Bench.entries.get(self.name, 0) + time_taken + Bench.entries[self.name] = Bench.entries.get( + self.name, 0) + time_taken @classmethod def report(cls): - total_time = sum((time_taken for time_taken in list(cls.entries.values()))) + total_time = sum( + (time_taken for time_taken in list(cls.entries.values()))) print("\nTiming report\n") for name, time_taken in list(cls.entries.items()): - percent = int(round((time_taken/total_time) * 100)) + percent = int(round((time_taken / total_time) * 100)) print("[{}%] {}: {}".format(percent, name, time_taken)) print() diff --git a/wqflask/utility/chunks.py b/wqflask/utility/chunks.py index 9a7db102..484b5de6 100644 --- a/wqflask/utility/chunks.py +++ b/wqflask/utility/chunks.py @@ -26,6 +26,6 @@ def divide_into_chunks(the_list, number_chunks): chunks = [] for counter in range(0, length, chunksize): - chunks.append(the_list[counter:counter+chunksize]) + chunks.append(the_list[counter:counter + chunksize]) return chunks diff --git a/wqflask/utility/corestats.py b/wqflask/utility/corestats.py index 67ca3ad3..da0a21db 100644 --- a/wqflask/utility/corestats.py +++ b/wqflask/utility/corestats.py @@ -15,7 +15,9 @@ import sys -#ZS: Should switch to using some third party library for this; maybe scipy has an equivalent +# ZS: Should switch to using some third party library for this; maybe scipy has an equivalent + + class Stats: def __init__(self, sequence): @@ -63,7 +65,8 @@ class Stats: if len(self.sequence) < 1: value = None elif (percentile >= 100): - sys.stderr.write('ERROR: percentile must be < 100. you supplied: %s\n'% percentile) + sys.stderr.write( + 'ERROR: percentile must be < 100. you supplied: %s\n' % percentile) value = None else: element_idx = int(len(self.sequence) * (percentile / 100.0)) @@ -80,4 +83,4 @@ class Stats: # stats = corestats.Stats(sequence) # print stats.avg() # print stats.percentile(90) -# -------------------------------------------
\ No newline at end of file +# ------------------------------------------- diff --git a/wqflask/utility/elasticsearch_tools.py b/wqflask/utility/elasticsearch_tools.py index a5580811..eae3ba03 100644 --- a/wqflask/utility/elasticsearch_tools.py +++ b/wqflask/utility/elasticsearch_tools.py @@ -47,11 +47,14 @@ logger = getLogger(__name__) from utility.tools import ELASTICSEARCH_HOST, ELASTICSEARCH_PORT + def test_elasticsearch_connection(): - es = Elasticsearch(['http://'+ELASTICSEARCH_HOST+":"+str(ELASTICSEARCH_PORT)+'/'], verify_certs=True) + es = Elasticsearch(['http://' + ELASTICSEARCH_HOST + \ + ":" + str(ELASTICSEARCH_PORT) + '/'], verify_certs=True) if not es.ping(): logger.warning("Elasticsearch is DOWN") + def get_elasticsearch_connection(for_user=True): """Return a connection to ES. Returns None on failure""" logger.info("get_elasticsearch_connection") @@ -77,6 +80,7 @@ def get_elasticsearch_connection(for_user=True): return es + def setup_users_index(es_connection): if es_connection: index_settings = { @@ -85,20 +89,24 @@ def setup_users_index(es_connection): "type": "keyword"}}} es_connection.indices.create(index='users', ignore=400) - es_connection.indices.put_mapping(body=index_settings, index="users", doc_type="local") + es_connection.indices.put_mapping( + body=index_settings, index="users", doc_type="local") + def get_user_by_unique_column(es, column_name, column_value, index="users", doc_type="local"): return get_item_by_unique_column(es, column_name, column_value, index=index, doc_type=doc_type) + def save_user(es, user, user_id): es_save_data(es, "users", "local", user, user_id) + def get_item_by_unique_column(es, column_name, column_value, index, doc_type): item_details = None try: response = es.search( - index = index, doc_type = doc_type, body = { - "query": { "match": { column_name: column_value } } + index=index, doc_type=doc_type, body={ + "query": {"match": {column_name: column_value}} }) if len(response["hits"]["hits"]) > 0: item_details = response["hits"]["hits"][0]["_source"] @@ -106,7 +114,8 @@ def get_item_by_unique_column(es, column_name, column_value, index, doc_type): pass return item_details + def es_save_data(es, index, doc_type, data_item, data_id,): from time import sleep es.create(index, doc_type, body=data_item, id=data_id) - sleep(1) # Delay 1 second to allow indexing + sleep(1) # Delay 1 second to allow indexing diff --git a/wqflask/utility/external.py b/wqflask/utility/external.py index 50afea08..805d2ffe 100644 --- a/wqflask/utility/external.py +++ b/wqflask/utility/external.py @@ -4,6 +4,7 @@ import os import sys import subprocess + def shell(command): if subprocess.call(command, shell=True) != 0: - raise Exception("ERROR: failed on "+command) + raise Exception("ERROR: failed on " + command) diff --git a/wqflask/utility/gen_geno_ob.py b/wqflask/utility/gen_geno_ob.py index 81085ffe..e619b7b6 100644 --- a/wqflask/utility/gen_geno_ob.py +++ b/wqflask/utility/gen_geno_ob.py @@ -1,7 +1,8 @@ import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) -class genotype(object): + +class genotype: """ Replacement for reaper.Dataset so we can remove qtlreaper use while still generating mapping output figure """ @@ -18,7 +19,7 @@ class genotype(object): self.filler = False self.mb_exists = False - #ZS: This is because I'm not sure if some files switch the column that contains Mb/cM positions; might be unnecessary + # ZS: This is because I'm not sure if some files switch the column that contains Mb/cM positions; might be unnecessary self.cm_column = 2 self.mb_column = 3 @@ -36,14 +37,16 @@ class genotype(object): return len(self.chromosomes) def read_rdata_output(self, qtl_results): - #ZS: This is necessary because R/qtl requires centimorgan marker positions, which it normally gets from the .geno file, but that doesn't exist for HET3-ITP (which only has RData), so it needs to read in the marker cM positions from the results - self.chromosomes = [] #ZS: Overwriting since the .geno file's contents are just placeholders + # ZS: This is necessary because R/qtl requires centimorgan marker positions, which it normally gets from the .geno file, but that doesn't exist for HET3-ITP (which only has RData), so it needs to read in the marker cM positions from the results + # ZS: Overwriting since the .geno file's contents are just placeholders + self.chromosomes = [] - this_chr = "" #ZS: This is so it can track when the chromosome changes as it iterates through markers + this_chr = "" # ZS: This is so it can track when the chromosome changes as it iterates through markers chr_ob = None for marker in qtl_results: locus = Locus(self) - if (str(marker['chr']) != this_chr) and this_chr != "X": #ZS: This is really awkward but works as a temporary fix + # ZS: This is really awkward but works as a temporary fix + if (str(marker['chr']) != this_chr) and this_chr != "X": if this_chr != "": self.chromosomes.append(chr_ob) this_chr = str(marker['chr']) @@ -68,7 +71,7 @@ class genotype(object): with open(filename, 'r') as geno_file: lines = geno_file.readlines() - this_chr = "" #ZS: This is so it can track when the chromosome changes as it iterates through markers + this_chr = "" # ZS: This is so it can track when the chromosome changes as it iterates through markers chr_ob = None for line in lines: if line[0] == "#": @@ -119,7 +122,8 @@ class genotype(object): self.chromosomes.append(chr_ob) -class Chr(object): + +class Chr: def __init__(self, name, geno_ob): self.name = name self.loci = [] @@ -140,8 +144,9 @@ class Chr(object): def add_marker(self, marker_row): self.loci.append(Locus(self.geno_ob, marker_row)) -class Locus(object): - def __init__(self, geno_ob, marker_row = None): + +class Locus: + def __init__(self, geno_ob, marker_row=None): self.chr = None self.name = None self.cM = None @@ -153,9 +158,11 @@ class Locus(object): try: self.cM = float(marker_row[geno_ob.cm_column]) except: - self.cM = float(marker_row[geno_ob.mb_column]) if geno_ob.mb_exists else 0 + self.cM = float( + marker_row[geno_ob.mb_column]) if geno_ob.mb_exists else 0 try: - self.Mb = float(marker_row[geno_ob.mb_column]) if geno_ob.mb_exists else None + self.Mb = float( + marker_row[geno_ob.mb_column]) if geno_ob.mb_exists else None except: self.Mb = self.cM @@ -175,5 +182,5 @@ class Locus(object): for allele in marker_row[start_pos:]: if allele in list(geno_table.keys()): self.genotype.append(geno_table[allele]) - else: #ZS: Some genotype appears that isn't specified in the metadata, make it unknown + else: # ZS: Some genotype appears that isn't specified in the metadata, make it unknown self.genotype.append("U") diff --git a/wqflask/utility/genofile_parser.py b/wqflask/utility/genofile_parser.py index 0b736176..86d9823e 100644 --- a/wqflask/utility/genofile_parser.py +++ b/wqflask/utility/genofile_parser.py @@ -12,88 +12,89 @@ import simplejson as json from pprint import pformat as pf -class Marker(object): - def __init__(self): - self.name = None - self.chr = None - self.cM = None - self.Mb = None - self.genotypes = [] +class Marker: + def __init__(self): + self.name = None + self.chr = None + self.cM = None + self.Mb = None + self.genotypes = [] -class ConvertGenoFile(object): - def __init__(self, input_file): - self.mb_exists = False - self.cm_exists = False - self.markers = [] +class ConvertGenoFile: - self.latest_row_pos = None - self.latest_col_pos = None + def __init__(self, input_file): + self.mb_exists = False + self.cm_exists = False + self.markers = [] - self.latest_row_value = None - self.latest_col_value = None - self.input_fh = open(input_file) - print("!!!!!!!!!!!!!!!!PARSER!!!!!!!!!!!!!!!!!!") - self.haplotype_notation = { - '@mat': "1", - '@pat': "2", - '@het': "-999", - '@unk': "-999" - } - self.configurations = {} + self.latest_row_pos = None + self.latest_col_pos = None - def process_rows(self): - for self.latest_row_pos, row in enumerate(self.input_fh): - self.latest_row_value = row - # Take care of headers - if not row.strip(): - continue - if row.startswith('#'): - continue - if row.startswith('Chr'): - if 'Mb' in row.split(): - self.mb_exists = True - if 'cM' in row.split(): - self.cm_exists = True - skip = 2 + self.cm_exists + self.mb_exists - self.individuals = row.split()[skip:] - continue - if row.startswith('@'): - key, _separater, value = row.partition(':') - key = key.strip() - value = value.strip() - if key in self.haplotype_notation: - self.configurations[value] = self.haplotype_notation[key] - continue - if not len(self.configurations): - raise EmptyConfigurations - yield row + self.latest_row_value = None + self.latest_col_value = None + self.input_fh = open(input_file) + print("!!!!!!!!!!!!!!!!PARSER!!!!!!!!!!!!!!!!!!") + self.haplotype_notation = { + '@mat': "1", + '@pat': "2", + '@het': "-999", + '@unk': "-999" + } + self.configurations = {} - def process_csv(self): - for row in self.process_rows(): - row_items = row.split("\t") + def process_rows(self): + for self.latest_row_pos, row in enumerate(self.input_fh): + self.latest_row_value = row + # Take care of headers + if not row.strip(): + continue + if row.startswith('#'): + continue + if row.startswith('Chr'): + if 'Mb' in row.split(): + self.mb_exists = True + if 'cM' in row.split(): + self.cm_exists = True + skip = 2 + self.cm_exists + self.mb_exists + self.individuals = row.split()[skip:] + continue + if row.startswith('@'): + key, _separater, value = row.partition(':') + key = key.strip() + value = value.strip() + if key in self.haplotype_notation: + self.configurations[value] = self.haplotype_notation[key] + continue + if not len(self.configurations): + raise EmptyConfigurations + yield row - this_marker = Marker() - this_marker.name = row_items[1] - this_marker.chr = row_items[0] - if self.cm_exists and self.mb_exists: - this_marker.cM = row_items[2] - this_marker.Mb = row_items[3] - genotypes = row_items[4:] - elif self.cm_exists: - this_marker.cM = row_items[2] - genotypes = row_items[3:] - elif self.mb_exists: - this_marker.Mb = row_items[2] - genotypes = row_items[3:] - else: - genotypes = row_items[2:] - for item_count, genotype in enumerate(genotypes): - if genotype.upper().strip() in self.configurations: - this_marker.genotypes.append(self.configurations[genotype.upper().strip()]) - else: - print("WARNING:", genotype.upper()) - this_marker.genotypes.append("NA") - self.markers.append(this_marker.__dict__) + def process_csv(self): + for row in self.process_rows(): + row_items = row.split("\t") + this_marker = Marker() + this_marker.name = row_items[1] + this_marker.chr = row_items[0] + if self.cm_exists and self.mb_exists: + this_marker.cM = row_items[2] + this_marker.Mb = row_items[3] + genotypes = row_items[4:] + elif self.cm_exists: + this_marker.cM = row_items[2] + genotypes = row_items[3:] + elif self.mb_exists: + this_marker.Mb = row_items[2] + genotypes = row_items[3:] + else: + genotypes = row_items[2:] + for item_count, genotype in enumerate(genotypes): + if genotype.upper().strip() in self.configurations: + this_marker.genotypes.append( + self.configurations[genotype.upper().strip()]) + else: + print("WARNING:", genotype.upper()) + this_marker.genotypes.append("NA") + self.markers.append(this_marker.__dict__) diff --git a/wqflask/utility/helper_functions.py b/wqflask/utility/helper_functions.py index 7eb7f013..50e00421 100644 --- a/wqflask/utility/helper_functions.py +++ b/wqflask/utility/helper_functions.py @@ -4,20 +4,23 @@ from base.species import TheSpecies from utility import hmac -from flask import Flask, g +from flask import g import logging -logger = logging.getLogger(__name__ ) +logger = logging.getLogger(__name__) + def get_species_dataset_trait(self, start_vars): - #assert type(read_genotype) == type(bool()), "Expecting boolean value for read_genotype" if "temp_trait" in list(start_vars.keys()): - if start_vars['temp_trait'] == "True": - self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = start_vars['group']) - else: - self.dataset = data_set.create_dataset(start_vars['dataset']) + if start_vars['temp_trait'] == "True": + self.dataset = data_set.create_dataset( + dataset_name="Temp", + dataset_type="Temp", + group_name=start_vars['group']) + else: + self.dataset = data_set.create_dataset(start_vars['dataset']) else: - self.dataset = data_set.create_dataset(start_vars['dataset']) + self.dataset = data_set.create_dataset(start_vars['dataset']) logger.debug("After creating dataset") self.species = TheSpecies(dataset=self.dataset) logger.debug("After creating species") @@ -27,9 +30,6 @@ def get_species_dataset_trait(self, start_vars): get_qtl_info=True) logger.debug("After creating trait") - #if read_genotype: - #self.dataset.group.read_genotype_file() - #self.genotype = self.dataset.group.genotype def get_trait_db_obs(self, trait_db_list): if isinstance(trait_db_list, str): @@ -39,31 +39,34 @@ def get_trait_db_obs(self, trait_db_list): for trait in trait_db_list: data, _separator, hmac_string = trait.rpartition(':') data = data.strip() - assert hmac_string==hmac.hmac_creation(data), "Data tampering?" - trait_name, dataset_name = data.split(":") + assert hmac_string == hmac.hmac_creation(data), "Data tampering?" + trait_name, dataset_name = data.split(":")[:2] if dataset_name == "Temp": - dataset_ob = data_set.create_dataset(dataset_name=dataset_name, dataset_type="Temp", group_name=trait_name.split("_")[2]) + dataset_ob = data_set.create_dataset( + dataset_name=dataset_name, dataset_type="Temp", + group_name=trait_name.split("_")[2]) else: dataset_ob = data_set.create_dataset(dataset_name) trait_ob = create_trait(dataset=dataset_ob, - name=trait_name, - cellid=None) + name=trait_name, + cellid=None) if trait_ob: self.trait_list.append((trait_ob, dataset_ob)) -def get_species_groups(): - - species_query = "SELECT SpeciesId, MenuName FROM Species" - species_ids_and_names = g.db.execute(species_query).fetchall() - species_and_groups = [] - for species_id, species_name in species_ids_and_names: - this_species_groups = {} - this_species_groups['species'] = species_name - groups_query = "SELECT InbredSetName FROM InbredSet WHERE SpeciesId = %s" % (species_id) - groups = [group[0] for group in g.db.execute(groups_query).fetchall()] - - this_species_groups['groups'] = groups - species_and_groups.append(this_species_groups) +def get_species_groups(): + """Group each species into a group""" + _menu = {} - return species_and_groups + for species, group_name in g.db.execute( + "SELECT s.MenuName, i.InbredSetName FROM InbredSet i " + "INNER JOIN Species s ON s.SpeciesId = i.SpeciesId " + "ORDER BY i.SpeciesId ASC, i.Name ASC").fetchall(): + if species in _menu: + if _menu.get(species): + _menu = _menu[species].append(group_name) + else: + _menu[species] = [group_name] + return [{"species": key, + "groups": value} for key, value in + list(_menu.items())] diff --git a/wqflask/utility/logger.py b/wqflask/utility/logger.py index e904eb94..d706e32a 100644 --- a/wqflask/utility/logger.py +++ b/wqflask/utility/logger.py @@ -35,6 +35,7 @@ import datetime from utility.tools import LOG_LEVEL, LOG_LEVEL_DEBUG, LOG_SQL + class GNLogger: """A logger class with some additional functionality, such as multiple parameter logging, SQL logging, timing, colors, and lazy @@ -49,14 +50,14 @@ class GNLogger: """Set the undelying log level""" self.logger.setLevel(value) - def debug(self,*args): + def debug(self, *args): """Call logging.debug for multiple args. Use (lazy) debugf and level=num to filter on LOG_LEVEL_DEBUG. """ self.collect(self.logger.debug, *args) - def debug20(self,*args): + def debug20(self, *args): """Call logging.debug for multiple args. Use level=num to filter on LOG_LEVEL_DEBUG (NYI). @@ -65,29 +66,29 @@ LOG_LEVEL_DEBUG (NYI). if self.logger.getEffectiveLevel() < 20: self.collect(self.logger.debug, *args) - def info(self,*args): + def info(self, *args): """Call logging.info for multiple args""" self.collect(self.logger.info, *args) - def warning(self,*args): + def warning(self, *args): """Call logging.warning for multiple args""" self.collect(self.logger.warning, *args) # self.logger.warning(self.collect(*args)) - def error(self,*args): + def error(self, *args): """Call logging.error for multiple args""" now = datetime.datetime.utcnow() time_str = now.strftime('%H:%M:%S UTC %Y%m%d') - l = [time_str]+list(args) + l = [time_str] + list(args) self.collect(self.logger.error, *l) - def infof(self,*args): + def infof(self, *args): """Call logging.info for multiple args lazily""" # only evaluate function when logging if self.logger.getEffectiveLevel() < 30: self.collectf(self.logger.debug, *args) - def debugf(self,level=0,*args): + def debugf(self, level=0, *args): """Call logging.debug for multiple args lazily and handle LOG_LEVEL_DEBUG correctly @@ -97,7 +98,7 @@ LOG_LEVEL_DEBUG (NYI). if self.logger.getEffectiveLevel() < 20: self.collectf(self.logger.debug, *args) - def sql(self, sqlcommand, fun = None): + def sql(self, sqlcommand, fun=None): """Log SQL command, optionally invoking a timed fun""" if LOG_SQL: caller = stack()[1][3] @@ -110,11 +111,11 @@ LOG_LEVEL_DEBUG (NYI). self.info(result) return result - def collect(self,fun,*args): + def collect(self, fun, *args): """Collect arguments and use fun to output""" - out = "."+stack()[2][3] + out = "." + stack()[2][3] for a in args: - if len(out)>1: + if len(out) > 1: out += ": " if isinstance(a, str): out = out + a @@ -122,11 +123,11 @@ LOG_LEVEL_DEBUG (NYI). out = out + pf(a, width=160) fun(out) - def collectf(self,fun,*args): + def collectf(self, fun, *args): """Collect arguments and use fun to output one by one""" - out = "."+stack()[2][3] + out = "." + stack()[2][3] for a in args: - if len(out)>1: + if len(out) > 1: out += ": " if isfunction(a): out += a() @@ -139,7 +140,9 @@ LOG_LEVEL_DEBUG (NYI). # Get the module logger. You can override log levels at the # module level -def getLogger(name, level = None): + + +def getLogger(name, level=None): gnlogger = GNLogger(name) logger = gnlogger.logger @@ -148,5 +151,6 @@ def getLogger(name, level = None): else: logger.setLevel(LOG_LEVEL) - logger.info("Log level of "+name+" set to "+logging.getLevelName(logger.getEffectiveLevel())) + logger.info("Log level of " + name + " set to " + \ + logging.getLevelName(logger.getEffectiveLevel())) return gnlogger diff --git a/wqflask/utility/pillow_utils.py b/wqflask/utility/pillow_utils.py index c486abba..5713e155 100644 --- a/wqflask/utility/pillow_utils.py +++ b/wqflask/utility/pillow_utils.py @@ -3,12 +3,14 @@ from PIL import Image, ImageColor, ImageDraw, ImageFont from utility.tools import TEMPDIR import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) BLACK = ImageColor.getrgb("black") WHITE = ImageColor.getrgb("white") # def draw_rotated_text(canvas: Image, text: str, font: ImageFont, xy: tuple, fill: ImageColor=BLACK, angle: int=-90): + + def draw_rotated_text(canvas, text, font, xy, fill=BLACK, angle=-90): # type: (Image, str, ImageFont, tuple, ImageColor, int) """Utility function draw rotated text""" @@ -20,6 +22,8 @@ def draw_rotated_text(canvas, text, font, xy, fill=BLACK, angle=-90): canvas.paste(im=tmp_img2, box=tuple([int(i) for i in xy])) # def draw_open_polygon(canvas: Image, xy: tuple, fill: ImageColor=WHITE, outline: ImageColor=BLACK): + + def draw_open_polygon(canvas, xy, fill=None, outline=BLACK, width=0): # type: (Image, tuple, ImageColor, ImageColor) draw_ctx = ImageDraw.Draw(canvas) diff --git a/wqflask/utility/redis_tools.py b/wqflask/utility/redis_tools.py index 8052035f..96a4be12 100644 --- a/wqflask/utility/redis_tools.py +++ b/wqflask/utility/redis_tools.py @@ -133,8 +133,10 @@ def get_user_groups(user_id): for key in groups_list: try: group_ob = json.loads(groups_list[key]) - group_admins = set([this_admin.encode('utf-8') if this_admin else None for this_admin in group_ob['admins']]) - group_members = set([this_member.encode('utf-8') if this_member else None for this_member in group_ob['members']]) + group_admins = set([this_admin.encode( + 'utf-8') if this_admin else None for this_admin in group_ob['admins']]) + group_members = set([this_member.encode( + 'utf-8') if this_member else None for this_member in group_ob['members']]) if user_id in group_admins: admin_group_ids.append(group_ob['id']) elif user_id in group_members: @@ -203,7 +205,8 @@ def get_groups_like_unique_column(column_name, column_value): if column_value in group_info[column_name]: matched_groups.append(group_info) else: - matched_groups.append(load_json_from_redis(group_list, column_value)) + matched_groups.append( + load_json_from_redis(group_list, column_value)) return matched_groups diff --git a/wqflask/utility/startup_config.py b/wqflask/utility/startup_config.py index f1aaebb6..56d0af6f 100644 --- a/wqflask/utility/startup_config.py +++ b/wqflask/utility/startup_config.py @@ -1,39 +1,40 @@ from wqflask import app -from utility.tools import WEBSERVER_MODE, show_settings, get_setting_int, get_setting, get_setting_bool -import utility.logger -logger = utility.logger.getLogger(__name__ ) +from utility.tools import WEBSERVER_MODE +from utility.tools import show_settings +from utility.tools import get_setting_int +from utility.tools import get_setting +from utility.tools import get_setting_bool -BLUE = '\033[94m' + +BLUE = '\033[94m' GREEN = '\033[92m' -BOLD = '\033[1m' -ENDC = '\033[0m' +BOLD = '\033[1m' +ENDC = '\033[0m' + def app_config(): app.config['SESSION_TYPE'] = 'filesystem' if not app.config.get('SECRET_KEY'): import os app.config['SECRET_KEY'] = str(os.urandom(24)) - mode = WEBSERVER_MODE if mode == "DEV" or mode == "DEBUG": app.config['TEMPLATES_AUTO_RELOAD'] = True - # if mode == "DEBUG": - # app.config['EXPLAIN_TEMPLATE_LOADING'] = True <--- use overriding app param instead + print("==========================================") + show_settings() port = get_setting_int("SERVER_PORT") if get_setting_bool("USE_GN_SERVER"): - print(("GN2 API server URL is ["+BLUE+get_setting("GN_SERVER_URL")+ENDC+"]")) + print(f"GN2 API server URL is [{BLUE}GN_SERVER_URL{ENDC}]") import requests page = requests.get(get_setting("GN_SERVER_URL")) if page.status_code != 200: raise Exception("API server not found!") - - # import utility.elasticsearch_tools as es - # es.test_elasticsearch_connection() - - print(("GN2 is running. Visit %s[http://localhost:%s/%s](%s)" % (BLUE, str(port), ENDC, get_setting("WEBSERVER_URL")))) + print(f"GN2 is running. Visit {BLUE}" + f"[http://localhost:{str(port)}/{ENDC}]" + f"({get_setting('WEBSERVER_URL')})") diff --git a/wqflask/utility/svg.py b/wqflask/utility/svg.py index b92cc2d1..eddb97da 100644 --- a/wqflask/utility/svg.py +++ b/wqflask/utility/svg.py @@ -172,7 +172,7 @@ def _viewboxlist(a): """formats a tuple""" s = '' for e in a: - s += str(e)+' ' + s += str(e) + ' ' return s @@ -189,7 +189,7 @@ class pathdata: def __init__(self, x=None, y=None): self.path = [] if x is not None and y is not None: - self.path.append('M '+str(x)+' '+str(y)) + self.path.append('M ' + str(x) + ' ' + str(y)) def closepath(self): """ends the path""" @@ -197,79 +197,83 @@ class pathdata: def move(self, x, y): """move to absolute""" - self.path.append('M '+str(x)+' '+str(y)) + self.path.append('M ' + str(x) + ' ' + str(y)) def relmove(self, x, y): """move to relative""" - self.path.append('m '+str(x)+' '+str(y)) + self.path.append('m ' + str(x) + ' ' + str(y)) def line(self, x, y): """line to absolute""" - self.path.append('L '+str(x)+' '+str(y)) + self.path.append('L ' + str(x) + ' ' + str(y)) def relline(self, x, y): """line to relative""" - self.path.append('l '+str(x)+' '+str(y)) + self.path.append('l ' + str(x) + ' ' + str(y)) def hline(self, x): """horizontal line to absolute""" - self.path.append('H'+str(x)) + self.path.append('H' + str(x)) def relhline(self, x): """horizontal line to relative""" - self.path.append('h'+str(x)) + self.path.append('h' + str(x)) def vline(self, y): """verical line to absolute""" - self.path.append('V'+str(y)) + self.path.append('V' + str(y)) def relvline(self, y): """vertical line to relative""" - self.path.append('v'+str(y)) + self.path.append('v' + str(y)) def bezier(self, x1, y1, x2, y2, x, y): """bezier with xy1 and xy2 to xy absolut""" - self.path.append('C'+str(x1)+','+str(y1)+' '+str(x2) + - ','+str(y2)+' '+str(x)+','+str(y)) + self.path.append('C' + str(x1) + ',' + str(y1) + ' ' + str(x2) + + ',' + str(y2) + ' ' + str(x) + ',' + str(y)) def relbezier(self, x1, y1, x2, y2, x, y): """bezier with xy1 and xy2 to xy relative""" - self.path.append('c'+str(x1)+','+str(y1)+' '+str(x2) + - ','+str(y2)+' '+str(x)+','+str(y)) + self.path.append('c' + str(x1) + ',' + str(y1) + ' ' + str(x2) + + ',' + str(y2) + ' ' + str(x) + ',' + str(y)) def smbezier(self, x2, y2, x, y): """smooth bezier with xy2 to xy absolut""" - self.path.append('S'+str(x2)+','+str(y2)+' '+str(x)+','+str(y)) + self.path.append('S' + str(x2) + ',' + str(y2) + \ + ' ' + str(x) + ',' + str(y)) def relsmbezier(self, x2, y2, x, y): """smooth bezier with xy2 to xy relative""" - self.path.append('s'+str(x2)+','+str(y2)+' '+str(x)+','+str(y)) + self.path.append('s' + str(x2) + ',' + str(y2) + \ + ' ' + str(x) + ',' + str(y)) def qbezier(self, x1, y1, x, y): """quadratic bezier with xy1 to xy absolut""" - self.path.append('Q'+str(x1)+','+str(y1)+' '+str(x)+','+str(y)) + self.path.append('Q' + str(x1) + ',' + str(y1) + \ + ' ' + str(x) + ',' + str(y)) def relqbezier(self, x1, y1, x, y): """quadratic bezier with xy1 to xy relative""" - self.path.append('q'+str(x1)+','+str(y1)+' '+str(x)+','+str(y)) + self.path.append('q' + str(x1) + ',' + str(y1) + \ + ' ' + str(x) + ',' + str(y)) def smqbezier(self, x, y): """smooth quadratic bezier to xy absolut""" - self.path.append('T'+str(x)+','+str(y)) + self.path.append('T' + str(x) + ',' + str(y)) def relsmqbezier(self, x, y): """smooth quadratic bezier to xy relative""" - self.path.append('t'+str(x)+','+str(y)) + self.path.append('t' + str(x) + ',' + str(y)) def ellarc(self, rx, ry, xrot, laf, sf, x, y): """elliptival arc with rx and ry rotating with xrot using large-arc-flag and sweep-flag to xy absolut""" - self.path.append('A'+str(rx)+','+str(ry)+' '+str(xrot) + - ' '+str(laf)+' '+str(sf)+' '+str(x)+' '+str(y)) + self.path.append('A' + str(rx) + ',' + str(ry) + ' ' + str(xrot) + + ' ' + str(laf) + ' ' + str(sf) + ' ' + str(x) + ' ' + str(y)) def relellarc(self, rx, ry, xrot, laf, sf, x, y): """elliptival arc with rx and ry rotating with xrot using large-arc-flag and sweep-flag to xy relative""" - self.path.append('a'+str(rx)+','+str(ry)+' '+str(xrot) + - ' '+str(laf)+' '+str(sf)+' '+str(x)+' '+str(y)) + self.path.append('a' + str(rx) + ',' + str(ry) + ' ' + str(xrot) + + ' ' + str(laf) + ' ' + str(sf) + ' ' + str(x) + ' ' + str(y)) def __repr__(self): return ' '.join(self.path) @@ -312,36 +316,36 @@ class SVGelement: self.elements.append(SVGelement) def toXml(self, level, f): - f.write('\t'*level) - f.write('<'+self.type) + f.write('\t' * level) + f.write('<' + self.type) for attkey in list(self.attributes.keys()): - f.write(' '+_escape(str(attkey))+'=' + - _quoteattr(str(self.attributes[attkey]))) + f.write(' ' + _escape(str(attkey)) + '=' + + _quoteattr(str(self.attributes[attkey]))) if self.namespace: - f.write(' xmlns="' + _escape(str(self.namespace)) + - '" xmlns:xlink="http://www.w3.org/1999/xlink"') + f.write(' xmlns="' + _escape(str(self.namespace)) + + '" xmlns:xlink="http://www.w3.org/1999/xlink"') if self.elements or self.text or self.cdata: f.write('>') if self.elements: f.write('\n') for element in self.elements: - element.toXml(level+1, f) + element.toXml(level + 1, f) if self.cdata: - f.write('\n'+'\t'*(level+1)+'<![CDATA[') + f.write('\n' + '\t' * (level + 1) + '<![CDATA[') for line in self.cdata.splitlines(): - f.write('\n'+'\t'*(level+2)+line) - f.write('\n'+'\t'*(level+1)+']]>\n') + f.write('\n' + '\t' * (level + 2) + line) + f.write('\n' + '\t' * (level + 1) + ']]>\n') if self.text: if isinstance(self.text, type('')): # If the text is only text f.write(_escape(str(self.text))) else: # If the text is a spannedtext class f.write(str(self.text)) if self.elements: - f.write('\t'*level+'</'+self.type+'>\n') + f.write('\t' * level + '</' + self.type + '>\n') elif self.text: - f.write('</'+self.type+'>\n') + f.write('</' + self.type + '>\n') elif self.cdata: - f.write('\t'*level+'</'+self.type+'>\n') + f.write('\t' * level + '</' + self.type + '>\n') else: f.write('/>\n') @@ -447,38 +451,41 @@ class rect(SVGelement): if width == None or height == None: raise ValueError('both height and width are required') - SVGelement.__init__(self, 'rect', {'width':width,'height':height}, **args) - if x!=None: - self.attributes['x']=x - if y!=None: - self.attributes['y']=y - if fill!=None: - self.attributes['fill']=fill - if stroke!=None: - self.attributes['stroke']=stroke - if stroke_width!=None: - self.attributes['stroke-width']=stroke_width + SVGelement.__init__( + self, 'rect', {'width': width, 'height': height}, **args) + if x != None: + self.attributes['x'] = x + if y != None: + self.attributes['y'] = y + if fill != None: + self.attributes['fill'] = fill + if stroke != None: + self.attributes['stroke'] = stroke + if stroke_width != None: + self.attributes['stroke-width'] = stroke_width + class ellipse(SVGelement): """e=ellipse(rx,ry,x,y,fill,stroke,stroke_width,**args) an ellipse is defined as a center and a x and y radius. """ - def __init__(self,cx=None,cy=None,rx=None,ry=None,fill=None,stroke=None,stroke_width=None,**args): - if rx==None or ry== None: + + def __init__(self, cx=None, cy=None, rx=None, ry=None, fill=None, stroke=None, stroke_width=None, **args): + if rx == None or ry == None: raise ValueError('both rx and ry are required') - SVGelement.__init__(self, 'ellipse', {'rx':rx,'ry':ry}, **args) - if cx!=None: - self.attributes['cx']=cx - if cy!=None: - self.attributes['cy']=cy - if fill!=None: - self.attributes['fill']=fill - if stroke!=None: - self.attributes['stroke']=stroke - if stroke_width!=None: - self.attributes['stroke-width']=stroke_width + SVGelement.__init__(self, 'ellipse', {'rx': rx, 'ry': ry}, **args) + if cx != None: + self.attributes['cx'] = cx + if cy != None: + self.attributes['cy'] = cy + if fill != None: + self.attributes['fill'] = fill + if stroke != None: + self.attributes['stroke'] = stroke + if stroke_width != None: + self.attributes['stroke-width'] = stroke_width class circle(SVGelement): @@ -486,20 +493,22 @@ class circle(SVGelement): The circle creates an element using a x, y and radius values eg """ - def __init__(self,cx=None,cy=None,r=None,fill=None,stroke=None,stroke_width=None,**args): - if r==None: + + def __init__(self, cx=None, cy=None, r=None, fill=None, stroke=None, stroke_width=None, **args): + if r == None: raise ValueError('r is required') - SVGelement.__init__(self, 'circle', {'r':r}, **args) - if cx!=None: - self.attributes['cx']=cx - if cy!=None: - self.attributes['cy']=cy - if fill!=None: - self.attributes['fill']=fill - if stroke!=None: - self.attributes['stroke']=stroke - if stroke_width!=None: - self.attributes['stroke-width']=stroke_width + SVGelement.__init__(self, 'circle', {'r': r}, **args) + if cx != None: + self.attributes['cx'] = cx + if cy != None: + self.attributes['cy'] = cy + if fill != None: + self.attributes['fill'] = fill + if stroke != None: + self.attributes['stroke'] = stroke + if stroke_width != None: + self.attributes['stroke-width'] = stroke_width + class point(circle): """p=point(x,y,color) @@ -507,72 +516,83 @@ class point(circle): A point is defined as a circle with a size 1 radius. It may be more efficient to use a very small rectangle if you use many points because a circle is difficult to render. """ - def __init__(self,x,y,fill='black',**args): + + def __init__(self, x, y, fill='black', **args): circle.__init__(self, x, y, 1, fill, **args) + class line(SVGelement): """l=line(x1,y1,x2,y2,stroke,stroke_width,**args) A line is defined by a begin x,y pair and an end x,y pair """ - def __init__(self,x1=None,y1=None,x2=None,y2=None,stroke=None,stroke_width=None,**args): + + def __init__(self, x1=None, y1=None, x2=None, y2=None, stroke=None, stroke_width=None, **args): SVGelement.__init__(self, 'line', **args) - if x1!=None: - self.attributes['x1']=x1 - if y1!=None: - self.attributes['y1']=y1 - if x2!=None: - self.attributes['x2']=x2 - if y2!=None: - self.attributes['y2']=y2 - if stroke_width!=None: - self.attributes['stroke-width']=stroke_width - if stroke!=None: - self.attributes['stroke']=stroke + if x1 != None: + self.attributes['x1'] = x1 + if y1 != None: + self.attributes['y1'] = y1 + if x2 != None: + self.attributes['x2'] = x2 + if y2 != None: + self.attributes['y2'] = y2 + if stroke_width != None: + self.attributes['stroke-width'] = stroke_width + if stroke != None: + self.attributes['stroke'] = stroke + class polyline(SVGelement): """pl=polyline([[x1,y1],[x2,y2],...],fill,stroke,stroke_width,**args) a polyline is defined by a list of xy pairs """ - def __init__(self,points,fill=None,stroke=None,stroke_width=None,**args): - SVGelement.__init__(self, 'polyline', {'points':_xypointlist(points)}, **args) - if fill!=None: - self.attributes['fill']=fill - if stroke_width!=None: - self.attributes['stroke-width']=stroke_width - if stroke!=None: - self.attributes['stroke']=stroke + + def __init__(self, points, fill=None, stroke=None, stroke_width=None, **args): + SVGelement.__init__(self, 'polyline', { + 'points': _xypointlist(points)}, **args) + if fill != None: + self.attributes['fill'] = fill + if stroke_width != None: + self.attributes['stroke-width'] = stroke_width + if stroke != None: + self.attributes['stroke'] = stroke + class polygon(SVGelement): """pl=polyline([[x1,y1],[x2,y2],...],fill,stroke,stroke_width,**args) a polygon is defined by a list of xy pairs """ - def __init__(self,points,fill=None,stroke=None,stroke_width=None,**args): - SVGelement.__init__(self, 'polygon', {'points':_xypointlist(points)}, **args) - if fill!=None: - self.attributes['fill']=fill - if stroke_width!=None: - self.attributes['stroke-width']=stroke_width - if stroke!=None: - self.attributes['stroke']=stroke + + def __init__(self, points, fill=None, stroke=None, stroke_width=None, **args): + SVGelement.__init__( + self, 'polygon', {'points': _xypointlist(points)}, **args) + if fill != None: + self.attributes['fill'] = fill + if stroke_width != None: + self.attributes['stroke-width'] = stroke_width + if stroke != None: + self.attributes['stroke'] = stroke + class path(SVGelement): """p=path(path,fill,stroke,stroke_width,**args) a path is defined by a path object and optional width, stroke and fillcolor """ - def __init__(self,pathdata,fill=None,stroke=None,stroke_width=None,id=None,**args): - SVGelement.__init__(self, 'path', {'d':str(pathdata)}, **args) - if stroke!=None: - self.attributes['stroke']=stroke - if fill!=None: - self.attributes['fill']=fill - if stroke_width!=None: - self.attributes['stroke-width']=stroke_width - if id!=None: - self.attributes['id']=id + + def __init__(self, pathdata, fill=None, stroke=None, stroke_width=None, id=None, **args): + SVGelement.__init__(self, 'path', {'d': str(pathdata)}, **args) + if stroke != None: + self.attributes['stroke'] = stroke + if fill != None: + self.attributes['fill'] = fill + if stroke_width != None: + self.attributes['stroke-width'] = stroke_width + if id != None: + self.attributes['id'] = id class text(SVGelement): @@ -580,20 +600,21 @@ class text(SVGelement): a text element can bge used for displaying text on the screen """ - def __init__(self,x=None,y=None,text=None,font_size=None,font_family=None,text_anchor=None,**args): + + def __init__(self, x=None, y=None, text=None, font_size=None, font_family=None, text_anchor=None, **args): SVGelement.__init__(self, 'text', **args) - if x!=None: - self.attributes['x']=x - if y!=None: - self.attributes['y']=y - if font_size!=None: - self.attributes['font-size']=font_size - if font_family!=None: - self.attributes['font-family']=font_family - if text!=None: - self.text=text - if text_anchor!=None: - self.attributes['text-anchor']=text_anchor + if x != None: + self.attributes['x'] = x + if y != None: + self.attributes['y'] = y + if font_size != None: + self.attributes['font-size'] = font_size + if font_family != None: + self.attributes['font-family'] = font_family + if text != None: + self.text = text + if text_anchor != None: + self.attributes['text-anchor'] = text_anchor class textpath(SVGelement): @@ -601,10 +622,12 @@ class textpath(SVGelement): a textpath places a text on a path which is referenced by a link. """ - def __init__(self,link,text=None,**args): - SVGelement.__init__(self, 'textPath', {'xlink:href':link}, **args) - if text!=None: - self.text=text + + def __init__(self, link, text=None, **args): + SVGelement.__init__(self, 'textPath', {'xlink:href': link}, **args) + if text != None: + self.text = text + class pattern(SVGelement): """p=pattern(x,y,width,height,patternUnits,**args) @@ -613,18 +636,20 @@ class pattern(SVGelement): graphic object which can be replicated ("tiled") at fixed intervals in x and y to cover the areas to be painted. """ - def __init__(self,x=None,y=None,width=None,height=None,patternUnits=None,**args): + + def __init__(self, x=None, y=None, width=None, height=None, patternUnits=None, **args): SVGelement.__init__(self, 'pattern', **args) - if x!=None: - self.attributes['x']=x - if y!=None: - self.attributes['y']=y - if width!=None: - self.attributes['width']=width - if height!=None: - self.attributes['height']=height - if patternUnits!=None: - self.attributes['patternUnits']=patternUnits + if x != None: + self.attributes['x'] = x + if y != None: + self.attributes['y'] = y + if width != None: + self.attributes['width'] = width + if height != None: + self.attributes['height'] = height + if patternUnits != None: + self.attributes['patternUnits'] = patternUnits + class title(SVGelement): """t=title(text,**args) @@ -632,10 +657,12 @@ class title(SVGelement): a title is a text element. The text is displayed in the title bar add at least one to the root svg element """ - def __init__(self,text=None,**args): + + def __init__(self, text=None, **args): SVGelement.__init__(self, 'title', **args) - if text!=None: - self.text=text + if text != None: + self.text = text + class description(SVGelement): """d=description(text,**args) @@ -643,10 +670,12 @@ class description(SVGelement): a description can be added to any element and is used for a tooltip Add this element before adding other elements. """ - def __init__(self,text=None,**args): + + def __init__(self, text=None, **args): SVGelement.__init__(self, 'desc', **args) - if text!=None: - self.text=text + if text != None: + self.text = text + class lineargradient(SVGelement): """lg=lineargradient(x1,y1,x2,y2,id,**args) @@ -654,18 +683,20 @@ class lineargradient(SVGelement): defines a lineargradient using two xy pairs. stop elements van be added to define the gradient colors. """ - def __init__(self,x1=None,y1=None,x2=None,y2=None,id=None,**args): + + def __init__(self, x1=None, y1=None, x2=None, y2=None, id=None, **args): SVGelement.__init__(self, 'linearGradient', **args) - if x1!=None: - self.attributes['x1']=x1 - if y1!=None: - self.attributes['y1']=y1 - if x2!=None: - self.attributes['x2']=x2 - if y2!=None: - self.attributes['y2']=y2 - if id!=None: - self.attributes['id']=id + if x1 != None: + self.attributes['x1'] = x1 + if y1 != None: + self.attributes['y1'] = y1 + if x2 != None: + self.attributes['x2'] = x2 + if y2 != None: + self.attributes['y2'] = y2 + if id != None: + self.attributes['id'] = id + class radialgradient(SVGelement): """rg=radialgradient(cx,cy,r,fx,fy,id,**args) @@ -673,38 +704,43 @@ class radialgradient(SVGelement): defines a radial gradient using a outer circle which are defined by a cx,cy and r and by using a focalpoint. stop elements van be added to define the gradient colors. """ - def __init__(self,cx=None,cy=None,r=None,fx=None,fy=None,id=None,**args): + + def __init__(self, cx=None, cy=None, r=None, fx=None, fy=None, id=None, **args): SVGelement.__init__(self, 'radialGradient', **args) - if cx!=None: - self.attributes['cx']=cx - if cy!=None: - self.attributes['cy']=cy - if r!=None: - self.attributes['r']=r - if fx!=None: - self.attributes['fx']=fx - if fy!=None: - self.attributes['fy']=fy - if id!=None: - self.attributes['id']=id + if cx != None: + self.attributes['cx'] = cx + if cy != None: + self.attributes['cy'] = cy + if r != None: + self.attributes['r'] = r + if fx != None: + self.attributes['fx'] = fx + if fy != None: + self.attributes['fy'] = fy + if id != None: + self.attributes['id'] = id + class stop(SVGelement): """st=stop(offset,stop_color,**args) Puts a stop color at the specified radius """ - def __init__(self,offset,stop_color=None,**args): - SVGelement.__init__(self, 'stop', {'offset':offset}, **args) - if stop_color!=None: - self.attributes['stop-color']=stop_color + + def __init__(self, offset, stop_color=None, **args): + SVGelement.__init__(self, 'stop', {'offset': offset}, **args) + if stop_color != None: + self.attributes['stop-color'] = stop_color + class style(SVGelement): """st=style(type,cdata=None,**args) Add a CDATA element to this element for defing in line stylesheets etc.. """ - def __init__(self,type,cdata=None,**args): - SVGelement.__init__(self, 'style', {'type':type}, cdata=cdata, **args) + + def __init__(self, type, cdata=None, **args): + SVGelement.__init__(self, 'style', {'type': type}, cdata=cdata, **args) class image(SVGelement): @@ -712,22 +748,26 @@ class image(SVGelement): adds an image to the drawing. Supported formats are .png, .jpg and .svg. """ - def __init__(self,url,x=None,y=None,width=None,height=None,**args): - if width==None or height==None: + + def __init__(self, url, x=None, y=None, width=None, height=None, **args): + if width == None or height == None: raise ValueError('both height and width are required') - SVGelement.__init__(self, 'image', {'xlink:href':url,'width':width,'height':height}, **args) - if x!=None: - self.attributes['x']=x - if y!=None: - self.attributes['y']=y + SVGelement.__init__( + self, 'image', {'xlink:href': url, 'width': width, 'height': height}, **args) + if x != None: + self.attributes['x'] = x + if y != None: + self.attributes['y'] = y + class cursor(SVGelement): """c=cursor(url,**args) defines a custom cursor for a element or a drawing """ - def __init__(self,url,**args): - SVGelement.__init__(self, 'cursor', {'xlink:href':url}, **args) + + def __init__(self, url, **args): + SVGelement.__init__(self, 'cursor', {'xlink:href': url}, **args) class marker(SVGelement): @@ -736,20 +776,22 @@ class marker(SVGelement): defines a marker which can be used as an endpoint for a line or other pathtypes add an element to it which should be used as a marker. """ - def __init__(self,id=None,viewBox=None,refx=None,refy=None,markerWidth=None,markerHeight=None,**args): + + def __init__(self, id=None, viewBox=None, refx=None, refy=None, markerWidth=None, markerHeight=None, **args): SVGelement.__init__(self, 'marker', **args) - if id!=None: - self.attributes['id']=id - if viewBox!=None: - self.attributes['viewBox']=_viewboxlist(viewBox) - if refx!=None: - self.attributes['refX']=refx - if refy!=None: - self.attributes['refY']=refy - if markerWidth!=None: - self.attributes['markerWidth']=markerWidth - if markerHeight!=None: - self.attributes['markerHeight']=markerHeight + if id != None: + self.attributes['id'] = id + if viewBox != None: + self.attributes['viewBox'] = _viewboxlist(viewBox) + if refx != None: + self.attributes['refX'] = refx + if refy != None: + self.attributes['refY'] = refy + if markerWidth != None: + self.attributes['markerWidth'] = markerWidth + if markerHeight != None: + self.attributes['markerHeight'] = markerHeight + class group(SVGelement): """g=group(id,**args) @@ -757,10 +799,12 @@ class group(SVGelement): a group is defined by an id and is used to contain elements g.addElement(SVGelement) """ - def __init__(self,id=None,**args): + + def __init__(self, id=None, **args): SVGelement.__init__(self, 'g', **args) - if id!=None: - self.attributes['id']=id + if id != None: + self.attributes['id'] = id + class symbol(SVGelement): """sy=symbol(id,viewbox,**args) @@ -771,21 +815,24 @@ class symbol(SVGelement): sy.addElement(SVGelement) """ - def __init__(self,id=None,viewBox=None,**args): + def __init__(self, id=None, viewBox=None, **args): SVGelement.__init__(self, 'symbol', **args) - if id!=None: - self.attributes['id']=id - if viewBox!=None: - self.attributes['viewBox']=_viewboxlist(viewBox) + if id != None: + self.attributes['id'] = id + if viewBox != None: + self.attributes['viewBox'] = _viewboxlist(viewBox) + class defs(SVGelement): """d=defs(**args) container for defining elements """ - def __init__(self,**args): + + def __init__(self, **args): SVGelement.__init__(self, 'defs', **args) + class switch(SVGelement): """sw=switch(**args) @@ -793,7 +840,8 @@ class switch(SVGelement): requiredFeatures, requiredExtensions and systemLanguage. Refer to the SVG specification for details. """ - def __init__(self,**args): + + def __init__(self, **args): SVGelement.__init__(self, 'switch', **args) @@ -802,17 +850,18 @@ class use(SVGelement): references a symbol by linking to its id and its position, height and width """ - def __init__(self,link,x=None,y=None,width=None,height=None,**args): - SVGelement.__init__(self, 'use', {'xlink:href':link}, **args) - if x!=None: - self.attributes['x']=x - if y!=None: - self.attributes['y']=y - if width!=None: - self.attributes['width']=width - if height!=None: - self.attributes['height']=height + def __init__(self, link, x=None, y=None, width=None, height=None, **args): + SVGelement.__init__(self, 'use', {'xlink:href': link}, **args) + if x != None: + self.attributes['x'] = x + if y != None: + self.attributes['y'] = y + + if width != None: + self.attributes['width'] = width + if height != None: + self.attributes['height'] = height class link(SVGelement): @@ -821,17 +870,21 @@ class link(SVGelement): a link is defined by a hyperlink. add elements which have to be linked a.addElement(SVGelement) """ - def __init__(self,link='',**args): - SVGelement.__init__(self, 'a', {'xlink:href':link}, **args) + + def __init__(self, link='', **args): + SVGelement.__init__(self, 'a', {'xlink:href': link}, **args) + class view(SVGelement): """v=view(id,**args) a view can be used to create a view with different attributes""" - def __init__(self,id=None,**args): + + def __init__(self, id=None, **args): SVGelement.__init__(self, 'view', **args) - if id!=None: - self.attributes['id']=id + if id != None: + self.attributes['id'] = id + class script(SVGelement): """sc=script(type,type,cdata,**args) @@ -839,78 +892,94 @@ class script(SVGelement): adds a script element which contains CDATA to the SVG drawing """ - def __init__(self,type,cdata=None,**args): - SVGelement.__init__(self, 'script', {'type':type}, cdata=cdata, **args) + + def __init__(self, type, cdata=None, **args): + SVGelement.__init__( + self, 'script', {'type': type}, cdata=cdata, **args) + class animate(SVGelement): """an=animate(attribute,from,to,during,**args) animates an attribute. """ - def __init__(self,attribute,fr=None,to=None,dur=None,**args): - SVGelement.__init__(self, 'animate', {'attributeName':attribute}, **args) - if fr!=None: - self.attributes['from']=fr - if to!=None: - self.attributes['to']=to - if dur!=None: - self.attributes['dur']=dur + + def __init__(self, attribute, fr=None, to=None, dur=None, **args): + SVGelement.__init__( + self, 'animate', {'attributeName': attribute}, **args) + if fr != None: + self.attributes['from'] = fr + if to != None: + self.attributes['to'] = to + if dur != None: + self.attributes['dur'] = dur + class animateMotion(SVGelement): """an=animateMotion(pathdata,dur,**args) animates a SVGelement over the given path in dur seconds """ - def __init__(self,pathdata,dur,**args): + + def __init__(self, pathdata, dur, **args): SVGelement.__init__(self, 'animateMotion', **args) - if pathdata!=None: - self.attributes['path']=str(pathdata) - if dur!=None: - self.attributes['dur']=dur + if pathdata != None: + self.attributes['path'] = str(pathdata) + if dur != None: + self.attributes['dur'] = dur + class animateTransform(SVGelement): """antr=animateTransform(type,from,to,dur,**args) transform an element from and to a value. """ - def __init__(self,type=None,fr=None,to=None,dur=None,**args): - SVGelement.__init__(self, 'animateTransform', {'attributeName':'transform'}, **args) + + def __init__(self, type=None, fr=None, to=None, dur=None, **args): + SVGelement.__init__(self, 'animateTransform', { + 'attributeName': 'transform'}, **args) # As far as I know the attributeName is always transform - if type!=None: - self.attributes['type']=type - if fr!=None: - self.attributes['from']=fr - if to!=None: - self.attributes['to']=to - if dur!=None: - self.attributes['dur']=dur + if type != None: + self.attributes['type'] = type + if fr != None: + self.attributes['from'] = fr + if to != None: + self.attributes['to'] = to + if dur != None: + self.attributes['dur'] = dur + + class animateColor(SVGelement): """ac=animateColor(attribute,type,from,to,dur,**args) Animates the color of a element """ - def __init__(self,attribute,type=None,fr=None,to=None,dur=None,**args): - SVGelement.__init__(self, 'animateColor', {'attributeName':attribute}, **args) - if type!=None: - self.attributes['type']=type - if fr!=None: - self.attributes['from']=fr - if to!=None: - self.attributes['to']=to - if dur!=None: - self.attributes['dur']=dur + + def __init__(self, attribute, type=None, fr=None, to=None, dur=None, **args): + SVGelement.__init__(self, 'animateColor', { + 'attributeName': attribute}, **args) + if type != None: + self.attributes['type'] = type + if fr != None: + self.attributes['from'] = fr + if to != None: + self.attributes['to'] = to + if dur != None: + self.attributes['dur'] = dur + + class set(SVGelement): """st=set(attribute,to,during,**args) sets an attribute to a value for a """ - def __init__(self,attribute,to=None,dur=None,**args): - SVGelement.__init__(self, 'set', {'attributeName':attribute}, **args) - if to!=None: - self.attributes['to']=to - if dur!=None: - self.attributes['dur']=dur + def __init__(self, attribute, to=None, dur=None, **args): + SVGelement.__init__(self, 'set', {'attributeName': attribute}, **args) + if to != None: + self.attributes['to'] = to + if dur != None: + self.attributes['dur'] = dur class svg(SVGelement): @@ -928,15 +997,17 @@ class svg(SVGelement): d.setSVG(s) d.toXml() """ - def __init__(self,viewBox=None, width=None, height=None,**args): + + def __init__(self, viewBox=None, width=None, height=None, **args): SVGelement.__init__(self, 'svg', **args) - if viewBox!=None: - self.attributes['viewBox']=_viewboxlist(viewBox) - if width!=None: - self.attributes['width']=width - if height!=None: - self.attributes['height']=height - self.namespace="http://www.w3.org/2000/svg" + if viewBox != None: + self.attributes['viewBox'] = _viewboxlist(viewBox) + if width != None: + self.attributes['width'] = width + if height != None: + self.attributes['height'] = height + self.namespace = "http://www.w3.org/2000/svg" + class drawing: """d=drawing() @@ -950,29 +1021,32 @@ class drawing: """ def __init__(self, entity={}): - self.svg=None + self.svg = None self.entity = entity + def setSVG(self, svg): - self.svg=svg + self.svg = svg # Voeg een element toe aan de grafiek toe. - if use_dom_implementation==0: - def toXml(self, filename='',compress=False): + if use_dom_implementation == 0: + def toXml(self, filename='', compress=False): import io - xml=io.StringIO() + xml = io.StringIO() xml.write("<?xml version='1.0' encoding='UTF-8'?>\n") - xml.write("<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.0//EN\" \"http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd\"") + xml.write( + "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.0//EN\" \"http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd\"") if self.entity: xml.write(" [\n") for item in list(self.entity.keys()): - xml.write("<!ENTITY %s \"%s\">\n" % (item, self.entity[item])) + xml.write("<!ENTITY %s \"%s\">\n" % + (item, self.entity[item])) xml.write("]") xml.write(">\n") self.svg.toXml(0, xml) if not filename: if compress: import gzip - f=io.StringIO() - zf=gzip.GzipFile(fileobj=f, mode='wb') + f = io.StringIO() + zf = gzip.GzipFile(fileobj=f, mode='wb') zf.write(xml.getvalue()) zf.close() f.seek(0) @@ -980,57 +1054,62 @@ class drawing: else: return xml.getvalue() else: - if filename[-4:]=='svgz': + if filename[-4:] == 'svgz': import gzip - f=gzip.GzipFile(filename=filename, mode="wb", compresslevel=9) + f = gzip.GzipFile(filename=filename, + mode="wb", compresslevel=9) f.write(xml.getvalue()) f.close() else: - f=file(filename, 'w') + f = file(filename, 'w') f.write(xml.getvalue()) f.close() else: - def toXml(self,filename='',compress=False): + def toXml(self, filename='', compress=False): """drawing.toXml() ---->to the screen drawing.toXml(filename)---->to the file writes a svg drawing to the screen or to a file compresses if filename ends with svgz or if compress is true """ - doctype = implementation.createDocumentType('svg', "-//W3C//DTD SVG 1.0//EN""", 'http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd ') + doctype = implementation.createDocumentType( + 'svg', "-//W3C//DTD SVG 1.0//EN""", 'http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd ') global root # root is defined global so it can be used by the appender. Its also possible to use it as an arugument but # that is a bit messy. - root=implementation.createDocument(None, None, doctype) + root = implementation.createDocument(None, None, doctype) # Create the xml document. global appender + def appender(element, elementroot): """This recursive function appends elements to an element and sets the attributes and type. It stops when alle elements have been appended""" if element.namespace: - e=root.createElementNS(element.namespace, element.type) + e = root.createElementNS(element.namespace, element.type) else: - e=root.createElement(element.type) + e = root.createElement(element.type) if element.text: - textnode=root.createTextNode(element.text) + textnode = root.createTextNode(element.text) e.appendChild(textnode) - for attribute in list(element.attributes.keys()): #in element.attributes is supported from python 2.2 - e.setAttribute(attribute, str(element.attributes[attribute])) + # in element.attributes is supported from python 2.2 + for attribute in list(element.attributes.keys()): + e.setAttribute(attribute, str( + element.attributes[attribute])) if element.elements: for el in element.elements: - e=appender(el, e) + e = appender(el, e) elementroot.appendChild(e) return elementroot - root=appender(self.svg, root) + root = appender(self.svg, root) if not filename: import io - xml=io.StringIO() + xml = io.StringIO() PrettyPrint(root, xml) if compress: import gzip - f=io.StringIO() - zf=gzip.GzipFile(fileobj=f, mode='wb') + f = io.StringIO() + zf = gzip.GzipFile(fileobj=f, mode='wb') zf.write(xml.getvalue()) zf.close() f.seek(0) @@ -1039,63 +1118,67 @@ class drawing: return xml.getvalue() else: try: - if filename[-4:]=='svgz': + if filename[-4:] == 'svgz': import gzip import io - xml=io.StringIO() + xml = io.StringIO() PrettyPrint(root, xml) - f=gzip.GzipFile(filename=filename, mode='wb', compresslevel=9) + f = gzip.GzipFile(filename=filename, + mode='wb', compresslevel=9) f.write(xml.getvalue()) f.close() else: - f=open(filename, 'w') + f = open(filename, 'w') PrettyPrint(root, f) f.close() except: print(("Cannot write SVG file: " + filename)) + def validate(self): try: import xml.parsers.xmlproc.xmlval except: - raise exceptions.ImportError('PyXml is required for validating SVG') - svg=self.toXml() - xv=xml.parsers.xmlproc.xmlval.XMLValidator() + raise exceptions.ImportError( + 'PyXml is required for validating SVG') + svg = self.toXml() + xv = xml.parsers.xmlproc.xmlval.XMLValidator() try: xv.feed(svg) except: raise Exception("SVG is not well formed, see messages above") else: print("SVG well formed") -if __name__=='__main__': - d=drawing() - s=svg((0, 0, 100, 100)) - r=rect(-100, -100, 300, 300, 'cyan') +if __name__ == '__main__': + + d = drawing() + s = svg((0, 0, 100, 100)) + r = rect(-100, -100, 300, 300, 'cyan') s.addElement(r) - t=title('SVGdraw Demo') + t = title('SVGdraw Demo') s.addElement(t) - g=group('animations') - e=ellipse(0, 0, 5, 2) + g = group('animations') + e = ellipse(0, 0, 5, 2) g.addElement(e) - c=circle(0, 0, 1, 'red') + c = circle(0, 0, 1, 'red') g.addElement(c) - pd=pathdata(0, -10) + pd = pathdata(0, -10) for i in range(6): pd.relsmbezier(10, 5, 0, 10) pd.relsmbezier(-10, 5, 0, 10) - an=animateMotion(pd, 10) - an.attributes['rotate']='auto-reverse' - an.attributes['repeatCount']="indefinite" + an = animateMotion(pd, 10) + an.attributes['rotate'] = 'auto-reverse' + an.attributes['repeatCount'] = "indefinite" g.addElement(an) s.addElement(g) for i in range(20, 120, 20): - u=use('#animations', i, 0) + u = use('#animations', i, 0) s.addElement(u) for i in range(0, 120, 20): for j in range(5, 105, 10): - c=circle(i, j, 1, 'red', 'black', .5) + c = circle(i, j, 1, 'red', 'black', .5) s.addElement(c) d.setSVG(s) diff --git a/wqflask/utility/temp_data.py b/wqflask/utility/temp_data.py index 4144ae00..07c5a318 100644 --- a/wqflask/utility/temp_data.py +++ b/wqflask/utility/temp_data.py @@ -2,7 +2,8 @@ from redis import Redis import simplejson as json -class TempData(object): + +class TempData: def __init__(self, temp_uuid): self.temp_uuid = temp_uuid @@ -11,7 +12,7 @@ class TempData(object): def store(self, field, value): self.redis.hset(self.key, field, value) - self.redis.expire(self.key, 60*15) # Expire in 15 minutes + self.redis.expire(self.key, 60 * 15) # Expire in 15 minutes def get_all(self): return self.redis.hgetall(self.key) diff --git a/wqflask/utility/tools.py b/wqflask/utility/tools.py index 65df59c3..e28abb48 100644 --- a/wqflask/utility/tools.py +++ b/wqflask/utility/tools.py @@ -9,16 +9,18 @@ from wqflask import app # Use the standard logger here to avoid a circular dependency import logging -logger = logging.getLogger(__name__ ) +logger = logging.getLogger(__name__) OVERRIDES = {} + def app_set(command_id, value): """Set application wide value""" app.config.setdefault(command_id, value) return value -def get_setting(command_id,guess=None): + +def get_setting(command_id, guess=None): """Resolve a setting from the environment or the global settings in app.config, with valid_path is a function checking whether the path points to an expected directory and returns the full path to @@ -54,7 +56,7 @@ def get_setting(command_id,guess=None): # print("Looking for "+command_id+"\n") command = value(os.environ.get(command_id)) if command is None or command == "": - command = OVERRIDES.get(command_id) # currently not in use + command = OVERRIDES.get(command_id) # currently not in use if command is None: # ---- Check whether setting exists in app command = value(app.config.get(command_id)) @@ -62,16 +64,19 @@ def get_setting(command_id,guess=None): command = value(guess) if command is None or command == "": # print command - raise Exception(command_id+' setting unknown or faulty (update default_settings.py?).') + raise Exception( + command_id + ' setting unknown or faulty (update default_settings.py?).') # print("Set "+command_id+"="+str(command)) return command + def get_setting_bool(id): v = get_setting(id) if v not in [0, False, 'False', 'FALSE', None]: - return True + return True return False + def get_setting_int(id): v = get_setting(id) if isinstance(v, str): @@ -80,69 +85,83 @@ def get_setting_int(id): return 0 return v + def valid_bin(bin): if os.path.islink(bin) or valid_file(bin): return bin return None + def valid_file(fn): if os.path.isfile(fn): return fn return None + def valid_path(dir): if os.path.isdir(dir): return dir return None + def js_path(module=None): """ Find the JS module in the two paths """ - try_gn = get_setting("JS_GN_PATH")+"/"+module + try_gn = get_setting("JS_GN_PATH") + "/" + module if valid_path(try_gn): return try_gn - try_guix = get_setting("JS_GUIX_PATH")+"/"+module + try_guix = get_setting("JS_GUIX_PATH") + "/" + module if valid_path(try_guix): return try_guix - raise "No JS path found for "+module+" (if not in Guix check JS_GN_PATH)" + raise "No JS path found for " + module + \ + " (if not in Guix check JS_GN_PATH)" + def reaper_command(guess=None): return get_setting("REAPER_COMMAND", guess) + def gemma_command(guess=None): return assert_bin(get_setting("GEMMA_COMMAND", guess)) + def gemma_wrapper_command(guess=None): return assert_bin(get_setting("GEMMA_WRAPPER_COMMAND", guess)) + def plink_command(guess=None): return assert_bin(get_setting("PLINK_COMMAND", guess)) + def flat_file_exists(subdir): base = get_setting("GENENETWORK_FILES") - return valid_path(base+"/"+subdir) + return valid_path(base + "/" + subdir) + def flat_files(subdir=None): base = get_setting("GENENETWORK_FILES") if subdir: - return assert_dir(base+"/"+subdir) + return assert_dir(base + "/" + subdir) return assert_dir(base) + def assert_bin(fn): if not valid_bin(fn): - raise Exception("ERROR: can not find binary "+fn) + raise Exception("ERROR: can not find binary " + fn) return fn + def assert_dir(dir): if not valid_path(dir): - raise Exception("ERROR: can not find directory "+dir) + raise Exception("ERROR: can not find directory " + dir) return dir + def assert_writable_dir(dir): try: fn = dir + "/test.txt" - fh = open( fn, 'w' ) + fh = open(fn, 'w') fh.write("I am writing this text to the file\n") fh.close() os.remove(fn) @@ -150,16 +169,19 @@ def assert_writable_dir(dir): raise Exception('Unable to write test.txt to directory ' + dir) return dir + def assert_file(fn): if not valid_file(fn): - raise Exception('Unable to find file '+fn) + raise Exception('Unable to find file ' + fn) return fn + def mk_dir(dir): if not valid_path(dir): os.makedirs(dir) return assert_dir(dir) + def locate(name, subdir=None): """ Locate a static flat file in the GENENETWORK_FILES environment. @@ -168,19 +190,22 @@ def locate(name, subdir=None): """ base = get_setting("GENENETWORK_FILES") if subdir: - base = base+"/"+subdir + base = base + "/" + subdir if valid_path(base): lookfor = base + "/" + name if valid_file(lookfor): - logger.info("Found: file "+lookfor+"\n") + logger.info("Found: file " + lookfor + "\n") return lookfor else: - raise Exception("Can not locate "+lookfor) - if subdir: sys.stderr.write(subdir) - raise Exception("Can not locate "+name+" in "+base) + raise Exception("Can not locate " + lookfor) + if subdir: + sys.stderr.write(subdir) + raise Exception("Can not locate " + name + " in " + base) + def locate_phewas(name, subdir=None): - return locate(name, '/phewas/'+subdir) + return locate(name, '/phewas/' + subdir) + def locate_ignore_error(name, subdir=None): """ @@ -191,35 +216,38 @@ def locate_ignore_error(name, subdir=None): """ base = get_setting("GENENETWORK_FILES") if subdir: - base = base+"/"+subdir + base = base + "/" + subdir if valid_path(base): lookfor = base + "/" + name if valid_file(lookfor): - logger.debug("Found: file "+name+"\n") + logger.debug("Found: file " + name + "\n") return lookfor - logger.info("WARNING: file "+name+" not found\n") + logger.info("WARNING: file " + name + " not found\n") return None + def tempdir(): """ Get UNIX TMPDIR by default """ return valid_path(get_setting("TMPDIR", "/tmp")) -BLUE = '\033[94m' + +BLUE = '\033[94m' GREEN = '\033[92m' -BOLD = '\033[1m' -ENDC = '\033[0m' +BOLD = '\033[1m' +ENDC = '\033[0m' + def show_settings(): from utility.tools import LOG_LEVEL - print(("Set global log level to "+BLUE+LOG_LEVEL+ENDC)) + print(("Set global log level to " + BLUE + LOG_LEVEL + ENDC)) log_level = getattr(logging, LOG_LEVEL.upper()) logging.basicConfig(level=log_level) logger.info(OVERRIDES) - logger.info(BLUE+"Mr. Mojo Risin 2"+ENDC) + logger.info(BLUE + "Mr. Mojo Risin 2" + ENDC) keylist = list(app.config.keys()) print("runserver.py: ****** Webserver configuration - k,v pairs from app.config ******") keylist.sort() @@ -231,35 +259,35 @@ def show_settings(): # Cached values -GN_VERSION = get_setting('GN_VERSION') -HOME = get_setting('HOME') -SERVER_PORT = get_setting('SERVER_PORT') -WEBSERVER_MODE = get_setting('WEBSERVER_MODE') -GN2_BASE_URL = get_setting('GN2_BASE_URL') -GN2_BRANCH_URL = get_setting('GN2_BRANCH_URL') -GN_SERVER_URL = get_setting('GN_SERVER_URL') -SERVER_PORT = get_setting_int('SERVER_PORT') -SQL_URI = get_setting('SQL_URI') -LOG_LEVEL = get_setting('LOG_LEVEL') -LOG_LEVEL_DEBUG = get_setting_int('LOG_LEVEL_DEBUG') -LOG_SQL = get_setting_bool('LOG_SQL') -LOG_SQL_ALCHEMY = get_setting_bool('LOG_SQL_ALCHEMY') -LOG_BENCH = get_setting_bool('LOG_BENCH') -LOG_FORMAT = "%(message)s" # not yet in use -USE_REDIS = get_setting_bool('USE_REDIS') -USE_GN_SERVER = get_setting_bool('USE_GN_SERVER') - -GENENETWORK_FILES = get_setting('GENENETWORK_FILES') -JS_GUIX_PATH = get_setting('JS_GUIX_PATH') +GN_VERSION = get_setting('GN_VERSION') +HOME = get_setting('HOME') +SERVER_PORT = get_setting('SERVER_PORT') +WEBSERVER_MODE = get_setting('WEBSERVER_MODE') +GN2_BASE_URL = get_setting('GN2_BASE_URL') +GN2_BRANCH_URL = get_setting('GN2_BRANCH_URL') +GN_SERVER_URL = get_setting('GN_SERVER_URL') +SERVER_PORT = get_setting_int('SERVER_PORT') +SQL_URI = get_setting('SQL_URI') +LOG_LEVEL = get_setting('LOG_LEVEL') +LOG_LEVEL_DEBUG = get_setting_int('LOG_LEVEL_DEBUG') +LOG_SQL = get_setting_bool('LOG_SQL') +LOG_SQL_ALCHEMY = get_setting_bool('LOG_SQL_ALCHEMY') +LOG_BENCH = get_setting_bool('LOG_BENCH') +LOG_FORMAT = "%(message)s" # not yet in use +USE_REDIS = get_setting_bool('USE_REDIS') +USE_GN_SERVER = get_setting_bool('USE_GN_SERVER') + +GENENETWORK_FILES = get_setting('GENENETWORK_FILES') +JS_GUIX_PATH = get_setting('JS_GUIX_PATH') assert_dir(JS_GUIX_PATH) -JS_GN_PATH = get_setting('JS_GN_PATH') +JS_GN_PATH = get_setting('JS_GN_PATH') # assert_dir(JS_GN_PATH) GITHUB_CLIENT_ID = get_setting('GITHUB_CLIENT_ID') GITHUB_CLIENT_SECRET = get_setting('GITHUB_CLIENT_SECRET') if GITHUB_CLIENT_ID != 'UNKNOWN' and GITHUB_CLIENT_SECRET: GITHUB_AUTH_URL = "https://github.com/login/oauth/authorize?client_id=" + \ - GITHUB_CLIENT_ID+"&client_secret="+GITHUB_CLIENT_SECRET + GITHUB_CLIENT_ID + "&client_secret=" + GITHUB_CLIENT_SECRET GITHUB_API_URL = get_setting('GITHUB_API_URL') ORCID_CLIENT_ID = get_setting('ORCID_CLIENT_ID') @@ -267,7 +295,8 @@ ORCID_CLIENT_SECRET = get_setting('ORCID_CLIENT_SECRET') ORCID_AUTH_URL = None if ORCID_CLIENT_ID != 'UNKNOWN' and ORCID_CLIENT_SECRET: ORCID_AUTH_URL = "https://orcid.org/oauth/authorize?response_type=code&scope=/authenticate&show_login=true&client_id=" + \ - ORCID_CLIENT_ID+"&client_secret="+ORCID_CLIENT_SECRET + "&redirect_uri=" + GN2_BRANCH_URL + "n/login/orcid_oauth2" + ORCID_CLIENT_ID + "&client_secret=" + ORCID_CLIENT_SECRET + \ + "&redirect_uri=" + GN2_BRANCH_URL + "n/login/orcid_oauth2" ORCID_TOKEN_URL = get_setting('ORCID_TOKEN_URL') ELASTICSEARCH_HOST = get_setting('ELASTICSEARCH_HOST') @@ -279,28 +308,29 @@ SMTP_CONNECT = get_setting('SMTP_CONNECT') SMTP_USERNAME = get_setting('SMTP_USERNAME') SMTP_PASSWORD = get_setting('SMTP_PASSWORD') -REAPER_COMMAND = app_set("REAPER_COMMAND", reaper_command()) -GEMMA_COMMAND = app_set("GEMMA_COMMAND", gemma_command()) +REAPER_COMMAND = app_set("REAPER_COMMAND", reaper_command()) +GEMMA_COMMAND = app_set("GEMMA_COMMAND", gemma_command()) assert(GEMMA_COMMAND is not None) -PLINK_COMMAND = app_set("PLINK_COMMAND", plink_command()) +PLINK_COMMAND = app_set("PLINK_COMMAND", plink_command()) GEMMA_WRAPPER_COMMAND = gemma_wrapper_command() -TEMPDIR = tempdir() # defaults to UNIX TMPDIR +TEMPDIR = tempdir() # defaults to UNIX TMPDIR assert_dir(TEMPDIR) # ---- Handle specific JS modules JS_GUIX_PATH = get_setting("JS_GUIX_PATH") assert_dir(JS_GUIX_PATH) -assert_dir(JS_GUIX_PATH+'/cytoscape-panzoom') +assert_dir(JS_GUIX_PATH + '/cytoscape-panzoom') CSS_PATH = JS_GUIX_PATH # The CSS is bundled together with the JS # assert_dir(JS_PATH) -JS_TWITTER_POST_FETCHER_PATH = get_setting("JS_TWITTER_POST_FETCHER_PATH", js_path("javascript-twitter-post-fetcher")) +JS_TWITTER_POST_FETCHER_PATH = get_setting( + "JS_TWITTER_POST_FETCHER_PATH", js_path("javascript-twitter-post-fetcher")) assert_dir(JS_TWITTER_POST_FETCHER_PATH) -assert_file(JS_TWITTER_POST_FETCHER_PATH+"/js/twitterFetcher_min.js") +assert_file(JS_TWITTER_POST_FETCHER_PATH + "/js/twitterFetcher_min.js") JS_CYTOSCAPE_PATH = get_setting("JS_CYTOSCAPE_PATH", js_path("cytoscape")) assert_dir(JS_CYTOSCAPE_PATH) -assert_file(JS_CYTOSCAPE_PATH+'/cytoscape.min.js') +assert_file(JS_CYTOSCAPE_PATH + '/cytoscape.min.js') # assert_file(PHEWAS_FILES+"/auwerx/PheWAS_pval_EMMA_norm.RData") diff --git a/wqflask/utility/type_checking.py b/wqflask/utility/type_checking.py index 6b029317..00f14ba9 100644 --- a/wqflask/utility/type_checking.py +++ b/wqflask/utility/type_checking.py @@ -7,6 +7,7 @@ def is_float(value): except: return False + def is_int(value): try: int(value) @@ -14,6 +15,7 @@ def is_int(value): except: return False + def is_str(value): if value is None: return False @@ -23,19 +25,22 @@ def is_str(value): except: return False -def get_float(vars_obj,name,default=None): + +def get_float(vars_obj, name, default=None): if name in vars_obj: if is_float(vars_obj[name]): return float(vars_obj[name]) return default -def get_int(vars_obj,name,default=None): + +def get_int(vars_obj, name, default=None): if name in vars_obj: if is_int(vars_obj[name]): return float(vars_obj[name]) return default -def get_string(vars_obj,name,default=None): + +def get_string(vars_obj, name, default=None): if name in vars_obj: if not vars_obj[name] is None: return str(vars_obj[name]) diff --git a/wqflask/utility/webqtlUtil.py b/wqflask/utility/webqtlUtil.py index 5681fadf..0cb71567 100644 --- a/wqflask/utility/webqtlUtil.py +++ b/wqflask/utility/webqtlUtil.py @@ -33,44 +33,46 @@ from math import * from base import webqtlConfig # NL, 07/27/2010. moved from webqtlForm.py -#Dict of Parents and F1 information, In the order of [F1, Mat, Pat] -ParInfo ={ -'BXH':['BHF1', 'HBF1', 'C57BL/6J', 'C3H/HeJ'], -'AKXD':['AKF1', 'KAF1', 'AKR/J', 'DBA/2J'], -'BXD':['B6D2F1', 'D2B6F1', 'C57BL/6J', 'DBA/2J'], -'C57BL-6JxC57BL-6NJF2':['', '', 'C57BL/6J', 'C57BL/6NJ'], -'BXD300':['B6D2F1', 'D2B6F1', 'C57BL/6J', 'DBA/2J'], -'B6BTBRF2':['B6BTBRF1', 'BTBRB6F1', 'C57BL/6J', 'BTBRT<+>tf/J'], -'BHHBF2':['B6HF2', 'HB6F2', 'C57BL/6J', 'C3H/HeJ'], -'BHF2':['B6HF2', 'HB6F2', 'C57BL/6J', 'C3H/HeJ'], -'B6D2F2':['B6D2F1', 'D2B6F1', 'C57BL/6J', 'DBA/2J'], -'BDF2-1999':['B6D2F2', 'D2B6F2', 'C57BL/6J', 'DBA/2J'], -'BDF2-2005':['B6D2F1', 'D2B6F1', 'C57BL/6J', 'DBA/2J'], -'CTB6F2':['CTB6F2', 'B6CTF2', 'C57BL/6J', 'Castaneous'], -'CXB':['CBF1', 'BCF1', 'C57BL/6ByJ', 'BALB/cByJ'], -'AXBXA':['ABF1', 'BAF1', 'C57BL/6J', 'A/J'], -'AXB':['ABF1', 'BAF1', 'C57BL/6J', 'A/J'], -'BXA':['BAF1', 'ABF1', 'C57BL/6J', 'A/J'], -'LXS':['LSF1', 'SLF1', 'ISS', 'ILS'], -'HXBBXH':['SHR_BNF1', 'BN_SHRF1', 'BN-Lx/Cub', 'SHR/OlaIpcv'], -'BayXSha':['BayXShaF1', 'ShaXBayF1', 'Bay-0', 'Shahdara'], -'ColXBur':['ColXBurF1', 'BurXColF1', 'Col-0', 'Bur-0'], -'ColXCvi':['ColXCviF1', 'CviXColF1', 'Col-0', 'Cvi'], -'SXM':['SMF1', 'MSF1', 'Steptoe', 'Morex'], -'HRDP':['SHR_BNF1', 'BN_SHRF1', 'BN-Lx/Cub', 'SHR/OlaIpcv'] +# Dict of Parents and F1 information, In the order of [F1, Mat, Pat] +ParInfo = { + 'BXH': ['BHF1', 'HBF1', 'C57BL/6J', 'C3H/HeJ'], + 'AKXD': ['AKF1', 'KAF1', 'AKR/J', 'DBA/2J'], + 'BXD': ['B6D2F1', 'D2B6F1', 'C57BL/6J', 'DBA/2J'], + 'C57BL-6JxC57BL-6NJF2': ['', '', 'C57BL/6J', 'C57BL/6NJ'], + 'BXD300': ['B6D2F1', 'D2B6F1', 'C57BL/6J', 'DBA/2J'], + 'B6BTBRF2': ['B6BTBRF1', 'BTBRB6F1', 'C57BL/6J', 'BTBRT<+>tf/J'], + 'BHHBF2': ['B6HF2', 'HB6F2', 'C57BL/6J', 'C3H/HeJ'], + 'BHF2': ['B6HF2', 'HB6F2', 'C57BL/6J', 'C3H/HeJ'], + 'B6D2F2': ['B6D2F1', 'D2B6F1', 'C57BL/6J', 'DBA/2J'], + 'BDF2-1999': ['B6D2F2', 'D2B6F2', 'C57BL/6J', 'DBA/2J'], + 'BDF2-2005': ['B6D2F1', 'D2B6F1', 'C57BL/6J', 'DBA/2J'], + 'CTB6F2': ['CTB6F2', 'B6CTF2', 'C57BL/6J', 'Castaneous'], + 'CXB': ['CBF1', 'BCF1', 'C57BL/6ByJ', 'BALB/cByJ'], + 'AXBXA': ['ABF1', 'BAF1', 'C57BL/6J', 'A/J'], + 'AXB': ['ABF1', 'BAF1', 'C57BL/6J', 'A/J'], + 'BXA': ['BAF1', 'ABF1', 'C57BL/6J', 'A/J'], + 'LXS': ['LSF1', 'SLF1', 'ISS', 'ILS'], + 'HXBBXH': ['SHR_BNF1', 'BN_SHRF1', 'BN-Lx/Cub', 'SHR/OlaIpcv'], + 'BayXSha': ['BayXShaF1', 'ShaXBayF1', 'Bay-0', 'Shahdara'], + 'ColXBur': ['ColXBurF1', 'BurXColF1', 'Col-0', 'Bur-0'], + 'ColXCvi': ['ColXCviF1', 'CviXColF1', 'Col-0', 'Cvi'], + 'SXM': ['SMF1', 'MSF1', 'Steptoe', 'Morex'], + 'HRDP': ['SHR_BNF1', 'BN_SHRF1', 'BN-Lx/Cub', 'SHR/OlaIpcv'] } ######################################### # Accessory Functions ######################################### -def genRandStr(prefix = "", length=8, chars=string.ascii_letters+string.digits): + +def genRandStr(prefix="", length=8, chars=string.ascii_letters + string.digits): from random import choice _str = prefix[:] for i in range(length): _str += choice(chars) return _str + def ListNotNull(lst): '''Obsolete - Use built in function any (or all or whatever) @@ -83,14 +85,16 @@ def ListNotNull(lst): return 1 return None -def readLineCSV(line): ### dcrowell July 2008 + +def readLineCSV(line): # dcrowell July 2008 """Parses a CSV string of text and returns a list containing each element as a string. Used by correlationPage""" returnList = line.split('","') - returnList[-1]=returnList[-1][:-2] - returnList[0]=returnList[0][1:] + returnList[-1] = returnList[-1][:-2] + returnList[0] = returnList[0][1:] return returnList + def cmpEigenValue(A, B): try: if A[0] > B[0]: @@ -102,12 +106,13 @@ def cmpEigenValue(A, B): except: return 0 + def hasAccessToConfidentialPhenotypeTrait(privilege, userName, authorized_users): access_to_confidential_phenotype_trait = 0 if webqtlConfig.USERDICT[privilege] > webqtlConfig.USERDICT['user']: access_to_confidential_phenotype_trait = 1 else: - AuthorizedUsersList=[x.strip() for x in authorized_users.split(',')] + AuthorizedUsersList = [x.strip() for x in authorized_users.split(',')] if userName in AuthorizedUsersList: access_to_confidential_phenotype_trait = 1 return access_to_confidential_phenotype_trait diff --git a/wqflask/wqflask/__init__.py b/wqflask/wqflask/__init__.py index 0564cfa7..2e94dd01 100644 --- a/wqflask/wqflask/__init__.py +++ b/wqflask/wqflask/__init__.py @@ -5,19 +5,41 @@ import jinja2 from flask import g from flask import Flask +from typing import Tuple +from urllib.parse import urlparse from utility import formatting from wqflask.markdown_routes import glossary_blueprint -from wqflask.markdown_routes import references_blueprint -from wqflask.markdown_routes import links_blueprint +from wqflask.markdown_routes import references_blueprint +from wqflask.markdown_routes import links_blueprint from wqflask.markdown_routes import policies_blueprint -from wqflask.markdown_routes import environments_blueprint -from wqflask.markdown_routes import facilities_blueprint +from wqflask.markdown_routes import environments_blueprint +from wqflask.markdown_routes import facilities_blueprint +from wqflask.markdown_routes import blogs_blueprint app = Flask(__name__) + +# Helper function for getting the SQL objects +def parse_db_url(sql_uri: str) -> Tuple: + """Parse SQL_URI env variable from an sql URI + e.g. 'mysql://user:pass@host_name/db_name' + + """ + parsed_db = urlparse(sql_uri) + return (parsed_db.hostname, parsed_db.username, + parsed_db.password, parsed_db.path[1:]) + + # See http://flask.pocoo.org/docs/config/#configuring-from-files # Note no longer use the badly named WQFLASK_OVERRIDES (nyi) app.config.from_envvar('GN2_SETTINGS') + +DB_HOST, DB_USER, DB_PASS, DB_NAME = parse_db_url(app.config.get('SQL_URI')) +app.config["DB_HOST"] = DB_HOST +app.config["DB_USER"] = DB_USER +app.config["DB_PASS"] = DB_PASS +app.config["DB_NAME"] = DB_NAME + app.jinja_env.globals.update( undefined=jinja2.StrictUndefined, numify=formatting.numify) @@ -29,6 +51,8 @@ app.register_blueprint(links_blueprint, url_prefix="/links") app.register_blueprint(policies_blueprint, url_prefix="/policies") app.register_blueprint(environments_blueprint, url_prefix="/environments") app.register_blueprint(facilities_blueprint, url_prefix="/facilities") +app.register_blueprint(blogs_blueprint, url_prefix="/blogs") + @app.before_request def before_request(): @@ -49,4 +73,4 @@ from wqflask import db_info from wqflask import user_login from wqflask import user_session -import wqflask.views +import wqflask.views diff --git a/wqflask/wqflask/api/correlation.py b/wqflask/wqflask/api/correlation.py index f5b50dcd..870f3275 100644 --- a/wqflask/wqflask/api/correlation.py +++ b/wqflask/wqflask/api/correlation.py @@ -16,21 +16,25 @@ from utility import webqtlUtil, helper_functions, corr_result_helpers from utility.benchmark import Bench import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) + def do_correlation(start_vars): assert('db' in start_vars) assert('target_db' in start_vars) assert('trait_id' in start_vars) - this_dataset = data_set.create_dataset(dataset_name = start_vars['db']) - target_dataset = data_set.create_dataset(dataset_name = start_vars['target_db']) - this_trait = create_trait(dataset = this_dataset, name = start_vars['trait_id']) + this_dataset = data_set.create_dataset(dataset_name=start_vars['db']) + target_dataset = data_set.create_dataset( + dataset_name=start_vars['target_db']) + this_trait = create_trait(dataset=this_dataset, + name=start_vars['trait_id']) this_trait = retrieve_sample_data(this_trait, this_dataset) corr_params = init_corr_params(start_vars) - corr_results = calculate_results(this_trait, this_dataset, target_dataset, corr_params) + corr_results = calculate_results( + this_trait, this_dataset, target_dataset, corr_params) #corr_results = collections.OrderedDict(sorted(corr_results.items(), key=lambda t: -abs(t[1][0]))) final_results = [] @@ -38,26 +42,26 @@ def do_correlation(start_vars): if corr_params['type'] == "tissue": [sample_r, num_overlap, sample_p, symbol] = corr_results[trait] result_dict = { - "trait" : trait, - "sample_r" : sample_r, - "#_strains" : num_overlap, - "p_value" : sample_p, - "symbol" : symbol + "trait": trait, + "sample_r": sample_r, + "#_strains": num_overlap, + "p_value": sample_p, + "symbol": symbol } elif corr_params['type'] == "literature" or corr_params['type'] == "lit": [gene_id, sample_r] = corr_results[trait] result_dict = { - "trait" : trait, - "sample_r" : sample_r, - "gene_id" : gene_id + "trait": trait, + "sample_r": sample_r, + "gene_id": gene_id } else: [sample_r, sample_p, num_overlap] = corr_results[trait] result_dict = { - "trait" : trait, - "sample_r" : sample_r, - "#_strains" : num_overlap, - "p_value" : sample_p + "trait": trait, + "sample_r": sample_r, + "#_strains": num_overlap, + "p_value": sample_p } final_results.append(result_dict) @@ -66,6 +70,7 @@ def do_correlation(start_vars): return final_results + def calculate_results(this_trait, this_dataset, target_dataset, corr_params): corr_results = {} @@ -73,52 +78,66 @@ def calculate_results(this_trait, this_dataset, target_dataset, corr_params): if corr_params['type'] == "tissue": trait_symbol_dict = this_dataset.retrieve_genes("Symbol") - corr_results = do_tissue_correlation_for_all_traits(this_trait, trait_symbol_dict, corr_params) + corr_results = do_tissue_correlation_for_all_traits( + this_trait, trait_symbol_dict, corr_params) sorted_results = collections.OrderedDict(sorted(list(corr_results.items()), key=lambda t: -abs(t[1][1]))) - elif corr_params['type'] == "literature" or corr_params['type'] == "lit": #ZS: Just so a user can use either "lit" or "literature" + # ZS: Just so a user can use either "lit" or "literature" + elif corr_params['type'] == "literature" or corr_params['type'] == "lit": trait_geneid_dict = this_dataset.retrieve_genes("GeneId") - corr_results = do_literature_correlation_for_all_traits(this_trait, this_dataset, trait_geneid_dict, corr_params) + corr_results = do_literature_correlation_for_all_traits( + this_trait, this_dataset, trait_geneid_dict, corr_params) sorted_results = collections.OrderedDict(sorted(list(corr_results.items()), - key=lambda t: -abs(t[1][1]))) + key=lambda t: -abs(t[1][1]))) else: for target_trait, target_vals in list(target_dataset.trait_data.items()): - result = get_sample_r_and_p_values(this_trait, this_dataset, target_vals, target_dataset, corr_params['type']) + result = get_sample_r_and_p_values( + this_trait, this_dataset, target_vals, target_dataset, corr_params['type']) if result is not None: corr_results[target_trait] = result - sorted_results = collections.OrderedDict(sorted(list(corr_results.items()), key=lambda t: -abs(t[1][0]))) + sorted_results = collections.OrderedDict( + sorted(list(corr_results.items()), key=lambda t: -abs(t[1][0]))) return sorted_results + def do_tissue_correlation_for_all_traits(this_trait, trait_symbol_dict, corr_params, tissue_dataset_id=1): - #Gets tissue expression values for the primary trait - primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(symbol_list = [this_trait.symbol]) + # Gets tissue expression values for the primary trait + primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( + symbol_list=[this_trait.symbol]) if this_trait.symbol.lower() in primary_trait_tissue_vals_dict: - primary_trait_tissue_values = primary_trait_tissue_vals_dict[this_trait.symbol.lower()] + primary_trait_tissue_values = primary_trait_tissue_vals_dict[this_trait.symbol.lower( + )] - corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(symbol_list=list(trait_symbol_dict.values())) + corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( + symbol_list=list(trait_symbol_dict.values())) tissue_corr_data = {} for trait, symbol in list(trait_symbol_dict.items()): if symbol and symbol.lower() in corr_result_tissue_vals_dict: - this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower()] + this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower( + )] result = correlation_functions.cal_zero_order_corr_for_tiss(primary_trait_tissue_values, this_trait_tissue_values, corr_params['method']) - tissue_corr_data[trait] = [result[0], result[1], result[2], symbol] + tissue_corr_data[trait] = [ + result[0], result[1], result[2], symbol] return tissue_corr_data + def do_literature_correlation_for_all_traits(this_trait, target_dataset, trait_geneid_dict, corr_params): - input_trait_mouse_gene_id = convert_to_mouse_gene_id(target_dataset.group.species.lower(), this_trait.geneid) + input_trait_mouse_gene_id = convert_to_mouse_gene_id( + target_dataset.group.species.lower(), this_trait.geneid) lit_corr_data = {} for trait, gene_id in list(trait_geneid_dict.items()): - mouse_gene_id = convert_to_mouse_gene_id(target_dataset.group.species.lower(), gene_id) + mouse_gene_id = convert_to_mouse_gene_id( + target_dataset.group.species.lower(), gene_id) if mouse_gene_id and str(mouse_gene_id).find(";") == -1: result = g.db.execute( @@ -145,6 +164,7 @@ def do_literature_correlation_for_all_traits(this_trait, target_dataset, trait_g return lit_corr_data + def get_sample_r_and_p_values(this_trait, this_dataset, target_vals, target_dataset, type): """ Calculates the sample r (or rho) and p-value @@ -163,12 +183,15 @@ def get_sample_r_and_p_values(this_trait, this_dataset, target_vals, target_data this_trait_vals.append(this_sample_value) shared_target_vals.append(target_sample_value) - this_trait_vals, shared_target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, shared_target_vals) + this_trait_vals, shared_target_vals, num_overlap = corr_result_helpers.normalize_values( + this_trait_vals, shared_target_vals) if type == 'pearson': - sample_r, sample_p = scipy.stats.pearsonr(this_trait_vals, shared_target_vals) + sample_r, sample_p = scipy.stats.pearsonr( + this_trait_vals, shared_target_vals) else: - sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, shared_target_vals) + sample_r, sample_p = scipy.stats.spearmanr( + this_trait_vals, shared_target_vals) if num_overlap > 5: if scipy.isnan(sample_r): @@ -176,6 +199,7 @@ def get_sample_r_and_p_values(this_trait, this_dataset, target_vals, target_data else: return [sample_r, sample_p, num_overlap] + def convert_to_mouse_gene_id(species=None, gene_id=None): """If the species is rat or human, translate the gene_id to the mouse geneid @@ -212,6 +236,7 @@ def convert_to_mouse_gene_id(species=None, gene_id=None): return mouse_gene_id + def init_corr_params(start_vars): method = "pearson" if 'method' in start_vars: @@ -227,9 +252,9 @@ def init_corr_params(start_vars): return_count = int(start_vars['return_count']) corr_params = { - 'method' : method, - 'type' : type, - 'return_count' : return_count + 'method': method, + 'type': type, + 'return_count': return_count } return corr_params diff --git a/wqflask/wqflask/api/gen_menu.py b/wqflask/wqflask/api/gen_menu.py index 18afc5ad..a699a484 100644 --- a/wqflask/wqflask/api/gen_menu.py +++ b/wqflask/wqflask/api/gen_menu.py @@ -1,71 +1,53 @@ -from flask import g - - -def gen_dropdown_json(): +from gn3.db.species import get_all_species +def gen_dropdown_json(conn): """Generates and outputs (as json file) the data for the main dropdown menus on the home page """ - - species = get_species() - groups = get_groups(species) - types = get_types(groups) - datasets = get_datasets(types) - - data = dict(species=species, + species = get_all_species(conn) + groups = get_groups(species, conn) + types = get_types(groups, conn) + datasets = get_datasets(types, conn) + return dict(species=species, groups=groups, types=types, datasets=datasets) - return data - - -def get_species(): - """Build species list""" - results = g.db.execute( - "SELECT Name, MenuName FROM Species ORDER BY OrderId").fetchall() - species = [] - for result in results: - species.append([str(result[0]), str(result[1])]) - - return species - - -def get_groups(species): +def get_groups(species, conn): """Build groups list""" groups = {} - for species_name, _species_full_name in species: - groups[species_name] = [] - - results = g.db.execute( - ("SELECT InbredSet.Name, InbredSet.FullName, " - "IFNULL(InbredSet.Family, 'None') " - "FROM InbredSet, Species WHERE Species.Name = '{}' " - "AND InbredSet.SpeciesId = Species.Id GROUP by InbredSet.Name " - "ORDER BY IFNULL(InbredSet.FamilyOrder, InbredSet.FullName) " - "ASC, IFNULL(InbredSet.Family, InbredSet.FullName) ASC, " - "InbredSet.FullName ASC, InbredSet.MenuOrderId ASC") - .format(species_name)).fetchall() - - for result in results: - family_name = "Family:" + str(result[2]) - groups[species_name].append( - [str(result[0]), str(result[1]), family_name]) - + with conn.cursor() as cursor: + for species_name, _species_full_name in species: + groups[species_name] = [] + cursor.execute( + ("SELECT InbredSet.Name, InbredSet.FullName, " + "IFNULL(InbredSet.Family, 'None') " + "FROM InbredSet, Species WHERE Species.Name = '{}' " + "AND InbredSet.SpeciesId = Species.Id GROUP by " + "InbredSet.Name ORDER BY IFNULL(InbredSet.FamilyOrder, " + "InbredSet.FullName) ASC, IFNULL(InbredSet.Family, " + "InbredSet.FullName) ASC, InbredSet.FullName ASC, " + "InbredSet.MenuOrderId ASC") + .format(species_name)) + results = cursor.fetchall() + for result in results: + family_name = "Family:" + str(result[2]) + groups[species_name].append( + [str(result[0]), str(result[1]), family_name]) return groups -def get_types(groups): +def get_types(groups, conn): """Build types list""" types = {} for species, group_dict in list(groups.items()): types[species] = {} for group_name, _group_full_name, _family_name in group_dict: - if phenotypes_exist(group_name): + if phenotypes_exist(group_name, conn): types[species][group_name] = [ ("Phenotypes", "Traits and Cofactors", "Phenotypes")] - if genotypes_exist(group_name): + if genotypes_exist(group_name, conn): if group_name in types[species]: types[species][group_name] += [ ("Genotypes", "DNA Markers and SNPs", "Genotypes")] @@ -73,11 +55,11 @@ def get_types(groups): types[species][group_name] = [ ("Genotypes", "DNA Markers and SNPs", "Genotypes")] if group_name in types[species]: - types_list = build_types(species, group_name) + types_list = build_types(species, group_name, conn) if len(types_list) > 0: types[species][group_name] += types_list else: - types_list = build_types(species, group_name) + types_list = build_types(species, group_name, conn) if len(types_list) > 0: types[species][group_name] = types_list else: @@ -88,22 +70,28 @@ def get_types(groups): return types -def phenotypes_exist(group_name): - results = g.db.execute( - ("SELECT Name FROM PublishFreeze " - "WHERE PublishFreeze.Name = " - "'{}'").format(group_name+"Publish")).fetchone() +def phenotypes_exist(group_name, conn): + results = [] + with conn.cursor() as cursor: + cursor.execute( + ("SELECT Name FROM PublishFreeze " + "WHERE PublishFreeze.Name = " + "'{}'").format(group_name + "Publish")) + results = cursor.fetchone() return bool(results) -def genotypes_exist(group_name): - results = g.db.execute( - ("SELECT Name FROM GenoFreeze " + - "WHERE GenoFreeze.Name = '{}'").format(group_name+"Geno")).fetchone() - return bool(results) +def genotypes_exist(group_name, conn): + with conn.cursor() as cursor: + cursor.execute( + ("SELECT Name FROM GenoFreeze " + + "WHERE GenoFreeze.Name = '{}'").format( + group_name + "Geno")) + results = cursor.fetchone() + return bool(results) -def build_types(species, group): +def build_types(species, group, conn): """Fetches tissues Gets the tissues with data for this species/group @@ -121,17 +109,19 @@ def build_types(species, group): "ORDER BY Tissue.Name").format(species, group) results = [] - for result in g.db.execute(query).fetchall(): - if bool(result): - these_datasets = build_datasets(species, group, result[0]) - if len(these_datasets) > 0: - results.append([str(result[0]), str(result[0]), - "Molecular Traits"]) - + with conn.cursor() as cursor: + cursor.execute(query) + for result in cursor.fetchall(): + if bool(result): + these_datasets = build_datasets(species, + group, result[0], conn) + if len(these_datasets) > 0: + results.append([str(result[0]), str(result[0]), + "Molecular Traits"]) return results -def get_datasets(types): +def get_datasets(types, conn): """Build datasets list""" datasets = {} for species, group_dict in list(types.items()): @@ -139,83 +129,89 @@ def get_datasets(types): for group, type_list in list(group_dict.items()): datasets[species][group] = {} for type_name in type_list: - these_datasets = build_datasets(species, group, type_name[0]) + these_datasets = build_datasets(species, group, + type_name[0], conn) if bool(these_datasets): datasets[species][group][type_name[0]] = these_datasets return datasets -def build_datasets(species, group, type_name): +def build_datasets(species, group, type_name, conn): """Gets dataset names from database""" dataset_text = dataset_value = None datasets = [] - if type_name == "Phenotypes": - results = g.db.execute( - ("SELECT InfoFiles.GN_AccesionId, PublishFreeze.Name, " - "PublishFreeze.FullName FROM InfoFiles, PublishFreeze, " - "InbredSet WHERE InbredSet.Name = '{}' AND " - "PublishFreeze.InbredSetId = InbredSet.Id AND " - "InfoFiles.InfoPageName = PublishFreeze.Name " - "ORDER BY PublishFreeze.CreateTime ASC").format(group)).fetchall() - if bool(results): - for result in results: - dataset_id = str(result[0]) - dataset_value = str(result[1]) - dataset_text = str(result[2]) - if group == 'MDP': - dataset_text = "Mouse Phenome Database" - + with conn.cursor() as cursor: + if type_name == "Phenotypes": + cursor.execute( + ("SELECT InfoFiles.GN_AccesionId, PublishFreeze.Name, " + "PublishFreeze.FullName FROM InfoFiles, PublishFreeze, " + "InbredSet WHERE InbredSet.Name = '{}' AND " + "PublishFreeze.InbredSetId = InbredSet.Id AND " + "InfoFiles.InfoPageName = PublishFreeze.Name " + "ORDER BY PublishFreeze.CreateTime ASC").format(group)) + results = cursor.fetchall() + if bool(results): + for result in results: + dataset_id = str(result[0]) + dataset_value = str(result[1]) + dataset_text = str(result[2]) + if group == 'MDP': + dataset_text = "Mouse Phenome Database" + + datasets.append([dataset_id, dataset_value, dataset_text]) + else: + cursor.execute( + ("SELECT PublishFreeze.Name, PublishFreeze.FullName " + "FROM PublishFreeze, InbredSet " + "WHERE InbredSet.Name = '{}' AND " + "PublishFreeze.InbredSetId = InbredSet.Id " + "ORDER BY PublishFreeze.CreateTime ASC") + .format(group)) + result = cursor.fetchone() + dataset_id = "None" + dataset_value = str(result[0]) + dataset_text = str(result[1]) datasets.append([dataset_id, dataset_value, dataset_text]) - else: - result = g.db.execute( - ("SELECT PublishFreeze.Name, PublishFreeze.FullName " - "FROM PublishFreeze, InbredSet " - "WHERE InbredSet.Name = '{}' AND " - "PublishFreeze.InbredSetId = InbredSet.Id " - "ORDER BY PublishFreeze.CreateTime ASC") - .format(group)).fetchone() + elif type_name == "Genotypes": + cursor.execute( + ("SELECT InfoFiles.GN_AccesionId " + "FROM InfoFiles, GenoFreeze, InbredSet " + "WHERE InbredSet.Name = '{}' AND " + "GenoFreeze.InbredSetId = InbredSet.Id AND " + "InfoFiles.InfoPageName = GenoFreeze.ShortName " + "ORDER BY GenoFreeze.CreateTime " + "DESC").format(group)) + results = cursor.fetchone() dataset_id = "None" - dataset_value = str(result[0]) - dataset_text = str(result[1]) + if bool(results): + dataset_id = str(results[0]) + + dataset_value = "%sGeno" % group + dataset_text = "%s Genotypes" % group datasets.append([dataset_id, dataset_value, dataset_text]) - elif type_name == "Genotypes": - results = g.db.execute( - ("SELECT InfoFiles.GN_AccesionId " + - "FROM InfoFiles, GenoFreeze, InbredSet " + - "WHERE InbredSet.Name = '{}' AND " + - "GenoFreeze.InbredSetId = InbredSet.Id AND " + - "InfoFiles.InfoPageName = GenoFreeze.ShortName " + - "ORDER BY GenoFreeze.CreateTime DESC").format(group)).fetchone() - - dataset_id = "None" - if bool(results): - dataset_id = str(results[0]) - - dataset_value = "%sGeno" % group - dataset_text = "%s Genotypes" % group - datasets.append([dataset_id, dataset_value, dataset_text]) - - else: # for mRNA expression/ProbeSet - results = g.db.execute( - ("SELECT ProbeSetFreeze.Id, ProbeSetFreeze.Name, " - "ProbeSetFreeze.FullName FROM ProbeSetFreeze, " - "ProbeFreeze, InbredSet, Tissue, Species WHERE " - "Species.Name = '{0}' AND Species.Id = " - "InbredSet.SpeciesId AND InbredSet.Name = '{1}' " - "AND ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id " - "AND Tissue.Name = '{2}' AND ProbeFreeze.TissueId = " - "Tissue.Id AND ProbeFreeze.InbredSetId = InbredSet.Id " - "AND ProbeSetFreeze.public > 0 " - "ORDER BY -ProbeSetFreeze.OrderList DESC, ProbeSetFreeze.CreateTime DESC").format(species, group, type_name)).fetchall() - - datasets = [] - for dataset_info in results: - this_dataset_info = [] - for info in dataset_info: - this_dataset_info.append(str(info)) - datasets.append(this_dataset_info) + else: # for mRNA expression/ProbeSet + cursor.execute( + ("SELECT ProbeSetFreeze.Id, ProbeSetFreeze.Name, " + "ProbeSetFreeze.FullName FROM ProbeSetFreeze, " + "ProbeFreeze, InbredSet, Tissue, Species WHERE " + "Species.Name = '{0}' AND Species.Id = " + "InbredSet.SpeciesId AND InbredSet.Name = '{1}' " + "AND ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id " + "AND Tissue.Name = '{2}' AND ProbeFreeze.TissueId = " + "Tissue.Id AND ProbeFreeze.InbredSetId = InbredSet.Id " + "AND ProbeSetFreeze.public > 0 " + "ORDER BY -ProbeSetFreeze.OrderList DESC, " + "ProbeSetFreeze.CreateTime " + "DESC").format(species, group, type_name)) + results = cursor.fetchall() + datasets = [] + for dataset_info in results: + this_dataset_info = [] + for info in dataset_info: + this_dataset_info.append(str(info)) + datasets.append(this_dataset_info) return datasets diff --git a/wqflask/wqflask/api/mapping.py b/wqflask/wqflask/api/mapping.py index d59a69df..f8b0d8bd 100644 --- a/wqflask/wqflask/api/mapping.py +++ b/wqflask/wqflask/api/mapping.py @@ -8,15 +8,16 @@ from utility import helper_functions from wqflask.marker_regression import gemma_mapping, rqtl_mapping, qtlreaper_mapping, plink_mapping import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) + def do_mapping_for_api(start_vars): assert('db' in start_vars) assert('trait_id' in start_vars) - dataset = data_set.create_dataset(dataset_name = start_vars['db']) + dataset = data_set.create_dataset(dataset_name=start_vars['db']) dataset.group.get_markers() - this_trait = create_trait(dataset = dataset, name = start_vars['trait_id']) + this_trait = create_trait(dataset=dataset, name=start_vars['trait_id']) this_trait = retrieve_sample_data(this_trait, dataset) samples = [] @@ -36,26 +37,32 @@ def do_mapping_for_api(start_vars): mapping_params = initialize_parameters(start_vars, dataset, this_trait) - covariates = "" #ZS: It seems to take an empty string as default. This should probably be changed. + # ZS: It seems to take an empty string as default. This should probably be changed. + covariates = "" if mapping_params['mapping_method'] == "gemma": header_row = ["name", "chr", "Mb", "lod_score", "p_value"] - if mapping_params['use_loco'] == "True": #ZS: gemma_mapping returns both results and the filename for LOCO, so need to only grab the former for api - result_markers = gemma_mapping.run_gemma(this_trait, dataset, samples, vals, covariates, mapping_params['use_loco'], mapping_params['maf'])[0] + # ZS: gemma_mapping returns both results and the filename for LOCO, so need to only grab the former for api + if mapping_params['use_loco'] == "True": + result_markers = gemma_mapping.run_gemma( + this_trait, dataset, samples, vals, covariates, mapping_params['use_loco'], mapping_params['maf'])[0] else: - result_markers = gemma_mapping.run_gemma(this_trait, dataset, samples, vals, covariates, mapping_params['use_loco'], mapping_params['maf']) + result_markers = gemma_mapping.run_gemma( + this_trait, dataset, samples, vals, covariates, mapping_params['use_loco'], mapping_params['maf']) elif mapping_params['mapping_method'] == "rqtl": header_row = ["name", "chr", "cM", "lod_score"] if mapping_params['num_perm'] > 0: _sperm_output, _suggestive, _significant, result_markers = rqtl_mapping.run_rqtl_geno(vals, dataset, mapping_params['rqtl_method'], mapping_params['rqtl_model'], - mapping_params['perm_check'], mapping_params['num_perm'], - mapping_params['do_control'], mapping_params['control_marker'], - mapping_params['manhattan_plot'], mapping_params['pair_scan']) + mapping_params['perm_check'], mapping_params[ + 'num_perm'], + mapping_params['do_control'], mapping_params[ + 'control_marker'], + mapping_params['manhattan_plot'], mapping_params['pair_scan']) else: result_markers = rqtl_mapping.run_rqtl_geno(vals, dataset, mapping_params['rqtl_method'], mapping_params['rqtl_model'], - mapping_params['perm_check'], mapping_params['num_perm'], - mapping_params['do_control'], mapping_params['control_marker'], - mapping_params['manhattan_plot'], mapping_params['pair_scan']) + mapping_params['perm_check'], mapping_params['num_perm'], + mapping_params['do_control'], mapping_params['control_marker'], + mapping_params['manhattan_plot'], mapping_params['pair_scan']) if mapping_params['limit_to']: result_markers = result_markers[:mapping_params['limit_to']] @@ -74,7 +81,6 @@ def do_mapping_for_api(start_vars): return result_markers, None - def initialize_parameters(start_vars, dataset, this_trait): mapping_params = {} @@ -118,7 +124,7 @@ def initialize_parameters(start_vars, dataset, this_trait): mapping_params['maf'] = 0.01 if 'maf' in start_vars: - mapping_params['maf'] = start_vars['maf'] # Minor allele frequency + mapping_params['maf'] = start_vars['maf'] # Minor allele frequency mapping_params['use_loco'] = True if 'use_loco' in start_vars: @@ -135,5 +141,3 @@ def initialize_parameters(start_vars, dataset, this_trait): mapping_params['perm_check'] = False return mapping_params - - diff --git a/wqflask/wqflask/api/router.py b/wqflask/wqflask/api/router.py index 60e163f2..a739e5a9 100644 --- a/wqflask/wqflask/api/router.py +++ b/wqflask/wqflask/api/router.py @@ -6,11 +6,13 @@ import csv import json import datetime import requests +import MySQLdb from zipfile import ZipFile, ZIP_DEFLATED import flask +from flask import current_app from flask import g from flask import request from flask import make_response @@ -23,47 +25,52 @@ from wqflask.api import correlation, mapping, gen_menu from utility.tools import flat_files import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) version = "pre1" + @app.route("/api/v_{}/".format(version)) def hello_world(): - return flask.jsonify({"hello":"world"}) + return flask.jsonify({"hello": "world"}) + @app.route("/api/v_{}/species".format(version)) def get_species_list(): - results = g.db.execute("SELECT SpeciesId, Name, FullName, TaxonomyId FROM Species;") + results = g.db.execute( + "SELECT SpeciesId, Name, FullName, TaxonomyId FROM Species;") the_species = results.fetchall() species_list = [] for species in the_species: species_dict = { - "Id" : species[0], - "Name" : species[1], - "FullName" : species[2], - "TaxonomyId" : species[3] + "Id": species[0], + "Name": species[1], + "FullName": species[2], + "TaxonomyId": species[3] } species_list.append(species_dict) return flask.jsonify(species_list) + @app.route("/api/v_{}/species/<path:species_name>".format(version)) @app.route("/api/v_{}/species/<path:species_name>.<path:file_format>".format(version)) -def get_species_info(species_name, file_format = "json"): +def get_species_info(species_name, file_format="json"): results = g.db.execute("""SELECT SpeciesId, Name, FullName, TaxonomyId FROM Species WHERE (Name="{0}" OR FullName="{0}" OR SpeciesName="{0}");""".format(species_name)) the_species = results.fetchone() - species_dict = { - "Id" : the_species[0], - "Name" : the_species[1], - "FullName" : the_species[2], - "TaxonomyId" : the_species[3] + species_dict = { + "Id": the_species[0], + "Name": the_species[1], + "FullName": the_species[2], + "TaxonomyId": the_species[3] } - + return flask.jsonify(species_dict) + @app.route("/api/v_{}/groups".format(version)) @app.route("/api/v_{}/groups/<path:species_name>".format(version)) def get_groups_list(species_name=None): @@ -87,14 +94,14 @@ def get_groups_list(species_name=None): groups_list = [] for group in the_groups: group_dict = { - "Id" : group[0], - "SpeciesId" : group[1], - "DisplayName" : group[2], - "Name" : group[3], - "FullName" : group[4], - "public" : group[5], - "MappingMethodId" : group[6], - "GeneticType" : group[7] + "Id": group[0], + "SpeciesId": group[1], + "DisplayName": group[2], + "Name": group[3], + "FullName": group[4], + "public": group[5], + "MappingMethodId": group[6], + "GeneticType": group[7] } groups_list.append(group_dict) @@ -102,11 +109,12 @@ def get_groups_list(species_name=None): else: return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + @app.route("/api/v_{}/group/<path:group_name>".format(version)) @app.route("/api/v_{}/group/<path:group_name>.<path:file_format>".format(version)) @app.route("/api/v_{}/group/<path:species_name>/<path:group_name>".format(version)) @app.route("/api/v_{}/group/<path:species_name>/<path:group_name>.<path:file_format>".format(version)) -def get_group_info(group_name, species_name = None, file_format = "json"): +def get_group_info(group_name, species_name=None, file_format="json"): if species_name: results = g.db.execute("""SELECT InbredSet.InbredSetId, InbredSet.SpeciesId, InbredSet.InbredSetName, InbredSet.Name, InbredSet.FullName, InbredSet.public, @@ -131,20 +139,21 @@ def get_group_info(group_name, species_name = None, file_format = "json"): group = results.fetchone() if group: group_dict = { - "Id" : group[0], - "SpeciesId" : group[1], - "DisplayName" : group[2], - "Name" : group[3], - "FullName" : group[4], - "public" : group[5], - "MappingMethodId" : group[6], - "GeneticType" : group[7] + "Id": group[0], + "SpeciesId": group[1], + "DisplayName": group[2], + "Name": group[3], + "FullName": group[4], + "public": group[5], + "MappingMethodId": group[6], + "GeneticType": group[7] } return flask.jsonify(group_dict) else: return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + @app.route("/api/v_{}/datasets/<path:group_name>".format(version)) @app.route("/api/v_{}/datasets/<path:species_name>/<path:group_name>".format(version)) def get_datasets_for_group(group_name, species_name=None): @@ -179,17 +188,17 @@ def get_datasets_for_group(group_name, species_name=None): datasets_list = [] for dataset in the_datasets: dataset_dict = { - "Id" : dataset[0], - "ProbeFreezeId" : dataset[1], - "AvgID" : dataset[2], - "Short_Abbreviation" : dataset[3], - "Long_Abbreviation" : dataset[4], - "FullName" : dataset[5], - "ShortName" : dataset[6], - "CreateTime" : dataset[7], - "public" : dataset[8], - "confidentiality" : dataset[9], - "DataScale" : dataset[10] + "Id": dataset[0], + "ProbeFreezeId": dataset[1], + "AvgID": dataset[2], + "Short_Abbreviation": dataset[3], + "Long_Abbreviation": dataset[4], + "FullName": dataset[5], + "ShortName": dataset[6], + "CreateTime": dataset[7], + "public": dataset[8], + "confidentiality": dataset[9], + "DataScale": dataset[10] } datasets_list.append(dataset_dict) @@ -197,14 +206,15 @@ def get_datasets_for_group(group_name, species_name=None): else: return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + @app.route("/api/v_{}/dataset/<path:dataset_name>".format(version)) @app.route("/api/v_{}/dataset/<path:dataset_name>.<path:file_format>".format(version)) @app.route("/api/v_{}/dataset/<path:group_name>/<path:dataset_name>".format(version)) @app.route("/api/v_{}/dataset/<path:group_name>/<path:dataset_name>.<path:file_format>".format(version)) -def get_dataset_info(dataset_name, group_name = None, file_format="json"): - #ZS: First get ProbeSet (mRNA expression) datasets and then get Phenotype datasets +def get_dataset_info(dataset_name, group_name=None, file_format="json"): + # ZS: First get ProbeSet (mRNA expression) datasets and then get Phenotype datasets - datasets_list = [] #ZS: I figure I might as well return a list if there are multiple matches, though I don"t know if this will actually happen in practice + datasets_list = [] # ZS: I figure I might as well return a list if there are multiple matches, though I don"t know if this will actually happen in practice probeset_query = """ SELECT ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.FullName, @@ -235,16 +245,16 @@ def get_dataset_info(dataset_name, group_name = None, file_format="json"): if dataset: dataset_dict = { - "dataset_type" : "mRNA expression", - "id" : dataset[0], - "name" : dataset[1], - "full_name" : dataset[2], - "short_name" : dataset[3], - "data_scale" : dataset[4], - "tissue_id" : dataset[5], - "tissue" : dataset[6], - "public" : dataset[7], - "confidential" : dataset[8] + "dataset_type": "mRNA expression", + "id": dataset[0], + "name": dataset[1], + "full_name": dataset[2], + "short_name": dataset[3], + "data_scale": dataset[4], + "tissue_id": dataset[5], + "tissue": dataset[6], + "public": dataset[7], + "confidential": dataset[8] } datasets_list.append(dataset_dict) @@ -272,25 +282,25 @@ def get_dataset_info(dataset_name, group_name = None, file_format="json"): if dataset: if dataset[5]: dataset_dict = { - "dataset_type" : "phenotype", - "id" : dataset[0], - "name" : dataset[1], - "description" : dataset[2], - "pubmed_id" : dataset[5], - "title" : dataset[6], - "year" : dataset[7] + "dataset_type": "phenotype", + "id": dataset[0], + "name": dataset[1], + "description": dataset[2], + "pubmed_id": dataset[5], + "title": dataset[6], + "year": dataset[7] } elif dataset[4]: dataset_dict = { - "dataset_type" : "phenotype", - "id" : dataset[0], - "name" : dataset[3], - "description" : dataset[4] + "dataset_type": "phenotype", + "id": dataset[0], + "name": dataset[3], + "description": dataset[4] } else: dataset_dict = { - "dataset_type" : "phenotype", - "id" : dataset[0] + "dataset_type": "phenotype", + "id": dataset[0] } datasets_list.append(dataset_dict) @@ -302,10 +312,12 @@ def get_dataset_info(dataset_name, group_name = None, file_format="json"): else: return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + @app.route("/api/v_{}/traits/<path:dataset_name>".format(version), methods=("GET",)) @app.route("/api/v_{}/traits/<path:dataset_name>.<path:file_format>".format(version), methods=("GET",)) -def fetch_traits(dataset_name, file_format = "json"): - trait_ids, trait_names, data_type, dataset_id = get_dataset_trait_ids(dataset_name, request.args) +def fetch_traits(dataset_name, file_format="json"): + trait_ids, trait_names, data_type, dataset_id = get_dataset_trait_ids( + dataset_name, request.args) if ("ids_only" in request.args) and (len(trait_ids) > 0): if file_format == "json": filename = dataset_name + "_trait_ids.json" @@ -353,7 +365,8 @@ def fetch_traits(dataset_name, file_format = "json"): ProbeSet.Id """ - field_list = ["Id", "Name", "Symbol", "Description", "Chr", "Mb", "Aliases", "Mean", "SE", "Locus", "LRS", "P-Value", "Additive", "h2"] + field_list = ["Id", "Name", "Symbol", "Description", "Chr", "Mb", + "Aliases", "Mean", "SE", "Locus", "LRS", "P-Value", "Additive", "h2"] elif data_type == "Geno": query = """ SELECT @@ -370,7 +383,8 @@ def fetch_traits(dataset_name, file_format = "json"): Geno.Id """ - field_list = ["Id", "Name", "Marker_Name", "Chr", "Mb", "Sequence", "Source"] + field_list = ["Id", "Name", "Marker_Name", + "Chr", "Mb", "Sequence", "Source"] else: query = """ SELECT @@ -386,7 +400,8 @@ def fetch_traits(dataset_name, file_format = "json"): PublishXRef.Id """ - field_list = ["Id", "PhenotypeId", "PublicationId", "Locus", "LRS", "Additive", "Sequence"] + field_list = ["Id", "PhenotypeId", "PublicationId", + "Locus", "LRS", "Additive", "Sequence"] if 'limit_to' in request.args: limit_number = request.args['limit_to'] @@ -430,10 +445,12 @@ def fetch_traits(dataset_name, file_format = "json"): else: return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + @app.route("/api/v_{}/sample_data/<path:dataset_name>".format(version)) @app.route("/api/v_{}/sample_data/<path:dataset_name>.<path:file_format>".format(version)) -def all_sample_data(dataset_name, file_format = "csv"): - trait_ids, trait_names, data_type, dataset_id = get_dataset_trait_ids(dataset_name, request.args) +def all_sample_data(dataset_name, file_format="csv"): + trait_ids, trait_names, data_type, dataset_id = get_dataset_trait_ids( + dataset_name, request.args) if len(trait_ids) > 0: sample_list = get_samplelist(dataset_name) @@ -536,9 +553,10 @@ def all_sample_data(dataset_name, file_format = "csv"): else: return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + @app.route("/api/v_{}/sample_data/<path:dataset_name>/<path:trait_name>".format(version)) @app.route("/api/v_{}/sample_data/<path:dataset_name>/<path:trait_name>.<path:file_format>".format(version)) -def trait_sample_data(dataset_name, trait_name, file_format = "json"): +def trait_sample_data(dataset_name, trait_name, file_format="json"): probeset_query = """ SELECT Strain.Name, Strain.Name2, ProbeSetData.value, ProbeSetData.Id, ProbeSetSE.error @@ -563,10 +581,10 @@ def trait_sample_data(dataset_name, trait_name, file_format = "json"): sample_list = [] for sample in sample_data: sample_dict = { - "sample_name": sample[0], - "sample_name_2": sample[1], - "value": sample[2], - "data_id": sample[3], + "sample_name": sample[0], + "sample_name_2": sample[1], + "value": sample[2], + "data_id": sample[3], } if sample[4]: sample_dict["se"] = sample[4] @@ -610,10 +628,10 @@ def trait_sample_data(dataset_name, trait_name, file_format = "json"): sample_list = [] for sample in sample_data: sample_dict = { - "sample_name" : sample[0], - "sample_name_2" : sample[1], - "value" : sample[2], - "data_id" : sample[3] + "sample_name": sample[0], + "sample_name_2": sample[1], + "value": sample[2], + "data_id": sample[3] } if sample[4]: sample_dict["se"] = sample[4] @@ -623,13 +641,14 @@ def trait_sample_data(dataset_name, trait_name, file_format = "json"): return flask.jsonify(sample_list) else: - return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + @app.route("/api/v_{}/trait/<path:dataset_name>/<path:trait_name>".format(version)) @app.route("/api/v_{}/trait/<path:dataset_name>/<path:trait_name>.<path:file_format>".format(version)) @app.route("/api/v_{}/trait_info/<path:dataset_name>/<path:trait_name>".format(version)) @app.route("/api/v_{}/trait_info/<path:dataset_name>/<path:trait_name>.<path:file_format>".format(version)) -def get_trait_info(dataset_name, trait_name, file_format = "json"): +def get_trait_info(dataset_name, trait_name, file_format="json"): probeset_query = """ SELECT ProbeSet.Id, ProbeSet.Name, ProbeSet.Symbol, ProbeSet.description, ProbeSet.Chr, ProbeSet.Mb, ProbeSet.alias, @@ -648,26 +667,27 @@ def get_trait_info(dataset_name, trait_name, file_format = "json"): trait_info = probeset_results.fetchone() if trait_info: trait_dict = { - "id" : trait_info[0], - "name" : trait_info[1], - "symbol" : trait_info[2], - "description" : trait_info[3], - "chr" : trait_info[4], - "mb" : trait_info[5], - "alias" :trait_info[6], - "mean" : trait_info[7], - "se" : trait_info[8], - "locus" : trait_info[9], - "lrs" : trait_info[10], - "p_value" : trait_info[11], - "additive" : trait_info[12] + "id": trait_info[0], + "name": trait_info[1], + "symbol": trait_info[2], + "description": trait_info[3], + "chr": trait_info[4], + "mb": trait_info[5], + "alias": trait_info[6], + "mean": trait_info[7], + "se": trait_info[8], + "locus": trait_info[9], + "lrs": trait_info[10], + "p_value": trait_info[11], + "additive": trait_info[12] } return flask.jsonify(trait_dict) else: - if "Publish" in dataset_name: #ZS: Check if the user input the dataset_name as BXDPublish, etc (which is always going to be the group name + "Publish" + # ZS: Check if the user input the dataset_name as BXDPublish, etc (which is always going to be the group name + "Publish" + if "Publish" in dataset_name: dataset_name = dataset_name.replace("Publish", "") - + group_id = get_group_id(dataset_name) pheno_query = """ SELECT @@ -684,25 +704,28 @@ def get_trait_info(dataset_name, trait_name, file_format = "json"): trait_info = pheno_results.fetchone() if trait_info: trait_dict = { - "id" : trait_info[0], - "locus" : trait_info[1], - "lrs" : trait_info[2], - "additive" : trait_info[3] + "id": trait_info[0], + "locus": trait_info[1], + "lrs": trait_info[2], + "additive": trait_info[3] } return flask.jsonify(trait_dict) else: return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + @app.route("/api/v_{}/correlation".format(version), methods=("GET",)) def get_corr_results(): results = correlation.do_correlation(request.args) if len(results) > 0: - return flask.jsonify(results) #ZS: I think flask.jsonify expects a dict/list instead of JSON + # ZS: I think flask.jsonify expects a dict/list instead of JSON + return flask.jsonify(results) else: return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + @app.route("/api/v_{}/mapping".format(version), methods=("GET",)) def get_mapping_results(): results, format = mapping.do_mapping_for_api(request.args) @@ -726,6 +749,7 @@ def get_mapping_results(): else: return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + @app.route("/api/v_{}/genotypes/<string:file_format>/<string:group_name>/<string:dataset_name>.zip".format(version)) @app.route("/api/v_{}/genotypes/<string:file_format>/<string:group_name>/<string:dataset_name>".format(version)) @app.route("/api/v_{}/genotypes/<string:file_format>/<string:group_name>.zip".format(version)) @@ -754,7 +778,8 @@ def get_genotypes(group_name, file_format="csv", dataset_name=None): output_lines.append(line.split()) i += 1 - csv_writer = csv.writer(si, delimiter = "\t", escapechar = "\\", quoting = csv.QUOTE_NONE) + csv_writer = csv.writer( + si, delimiter="\t", escapechar="\\", quoting=csv.QUOTE_NONE) else: return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") elif file_format == "rqtl2": @@ -765,18 +790,23 @@ def get_genotypes(group_name, file_format="csv", dataset_name=None): filename = group_name if os.path.isfile("{0}/{1}_geno.csv".format(flat_files("genotype/rqtl2"), group_name)): - yaml_file = json.load(open("{0}/{1}.json".format(flat_files("genotype/rqtl2"), group_name))) + yaml_file = json.load( + open("{0}/{1}.json".format(flat_files("genotype/rqtl2"), group_name))) yaml_file["geno"] = filename + "_geno.csv" yaml_file["gmap"] = filename + "_gmap.csv" yaml_file["pheno"] = filename + "_pheno.csv" config_file = [filename + ".json", json.dumps(yaml_file)] #config_file = [filename + ".yaml", open("{0}/{1}.yaml".format(flat_files("genotype/rqtl2"), group_name))] - geno_file = [filename + "_geno.csv", open("{0}/{1}_geno.csv".format(flat_files("genotype/rqtl2"), group_name))] - gmap_file = [filename + "_gmap.csv", open("{0}/{1}_gmap.csv".format(flat_files("genotype/rqtl2"), group_name))] + geno_file = [filename + "_geno.csv", + open("{0}/{1}_geno.csv".format(flat_files("genotype/rqtl2"), group_name))] + gmap_file = [filename + "_gmap.csv", + open("{0}/{1}_gmap.csv".format(flat_files("genotype/rqtl2"), group_name))] if dataset_name: - phenotypes = requests.get("http://gn2.genenetwork.org/api/v_pre1/sample_data/" + dataset_name) + phenotypes = requests.get( + "http://gn2.genenetwork.org/api/v_pre1/sample_data/" + dataset_name) else: - phenotypes = requests.get("http://gn2.genenetwork.org/api/v_pre1/sample_data/" + group_name + "Publish") + phenotypes = requests.get( + "http://gn2.genenetwork.org/api/v_pre1/sample_data/" + group_name + "Publish") with ZipFile(memory_file, 'w', compression=ZIP_DEFLATED) as zf: zf.writestr(config_file[0], config_file[1]) @@ -799,10 +829,11 @@ def get_genotypes(group_name, file_format="csv", dataset_name=None): for line in genofile: if limit_num and i >= limit_num: break - output_lines.append([line.strip() for line in line.split(",")]) + output_lines.append([line.strip() + for line in line.split(",")]) i += 1 - csv_writer = csv.writer(si, delimiter = ",") + csv_writer = csv.writer(si, delimiter=",") else: return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") @@ -813,27 +844,34 @@ def get_genotypes(group_name, file_format="csv", dataset_name=None): return output + @app.route("/api/v_{}/gen_dropdown".format(version), methods=("GET",)) def gen_dropdown_menu(): - results = gen_menu.gen_dropdown_json() + conn = MySQLdb.Connect(db=current_app.config.get("DB_NAME"), + user=current_app.config.get("DB_USER"), + passwd=current_app.config.get("DB_PASS"), + host=current_app.config.get("DB_HOST")) + results = gen_menu.gen_dropdown_json(conn) if len(results) > 0: return flask.jsonify(results) else: return return_error(code=500, source=request.url_rule.rule, title="Some error occurred", details="") + def return_error(code, source, title, details): json_ob = {"errors": [ { "status": code, - "source": { "pointer": source }, - "title" : title, + "source": {"pointer": source}, + "title": title, "detail": details } ]} return flask.jsonify(json_ob) + def get_dataset_trait_ids(dataset_name, start_vars): if 'limit_to' in start_vars: @@ -842,8 +880,8 @@ def get_dataset_trait_ids(dataset_name, start_vars): limit_string = "" if "Geno" in dataset_name: - data_type = "Geno" #ZS: Need to pass back the dataset type - query = """ + data_type = "Geno" # ZS: Need to pass back the dataset type + query = """ SELECT GenoXRef.GenoId, Geno.Name, GenoXRef.GenoFreezeId FROM @@ -866,7 +904,7 @@ def get_dataset_trait_ids(dataset_name, start_vars): data_type = "Publish" dataset_name = dataset_name.replace("Publish", "") dataset_id = get_group_id(dataset_name) - + query = """ SELECT PublishXRef.PhenotypeId, PublishXRef.Id, InbredSet.InbredSetCode @@ -881,7 +919,8 @@ def get_dataset_trait_ids(dataset_name, start_vars): results = g.db.execute(query).fetchall() trait_ids = [result[0] for result in results] - trait_names = [str(result[2]) + "_" + str(result[1]) for result in results] + trait_names = [str(result[2]) + "_" + str(result[1]) + for result in results] return trait_ids, trait_names, data_type, dataset_id @@ -906,6 +945,7 @@ def get_dataset_trait_ids(dataset_name, start_vars): dataset_id = results[0][2] return trait_ids, trait_names, data_type, dataset_id + def get_samplelist(dataset_name): group_id = get_group_id_from_dataset(dataset_name) @@ -915,13 +955,14 @@ def get_samplelist(dataset_name): WHERE StrainXRef.StrainId = Strain.Id AND StrainXRef.InbredSetId = {} """.format(group_id) - + results = g.db.execute(query).fetchall() - + samplelist = [result[0] for result in results] return samplelist + def get_group_id_from_dataset(dataset_name): if "Publish" in dataset_name: query = """ @@ -962,6 +1003,7 @@ def get_group_id_from_dataset(dataset_name): else: return None + def get_group_id(group_name): query = """ SELECT InbredSet.Id diff --git a/wqflask/wqflask/collect.py b/wqflask/wqflask/collect.py index 0291f2b8..01274ba9 100644 --- a/wqflask/wqflask/collect.py +++ b/wqflask/wqflask/collect.py @@ -35,11 +35,12 @@ def process_traits(unprocessed_traits): data, _separator, the_hmac = trait.rpartition(':') data = data.strip() if g.user_session.logged_in: - assert the_hmac == hmac.hmac_creation(data), "Data tampering?" + assert the_hmac == hmac.hmac_creation(data), "Data tampering?" traits.add(str(data)) return traits + def report_change(len_before, len_now): new_length = len_now - len_before if new_length: @@ -48,16 +49,18 @@ def report_change(len_before, len_now): else: logger.debug("No new traits were added.") + @app.route("/collections/store_trait_list", methods=('POST',)) def store_traits_list(): - params = request.form + params = request.form - traits = params['traits'] - hash = params['hash'] + traits = params['traits'] + hash = params['hash'] - Redis.set(hash, traits) + Redis.set(hash, traits) + + return hash - return hash @app.route("/collections/add") def collections_add(): @@ -68,19 +71,20 @@ def collections_add(): uc_id = g.user_session.add_collection(collection_name, set()) collections = g.user_session.user_collections - #ZS: One of these might be unnecessary + # ZS: One of these might be unnecessary if 'traits' in request.args: - traits=request.args['traits'] + traits = request.args['traits'] return render_template("collections/add.html", - traits = traits, - collections = collections, - ) + traits=traits, + collections=collections, + ) else: hash = request.args['hash'] return render_template("collections/add.html", - hash = hash, - collections = collections, - ) + hash=hash, + collections=collections, + ) + @app.route("/collections/new") def collections_new(): @@ -118,6 +122,7 @@ def collections_new(): # CauseAnError pass + def create_new(collection_name): params = request.args @@ -133,15 +138,17 @@ def create_new(collection_name): return redirect(url_for('view_collection', uc_id=uc_id)) + @app.route("/collections/list") def list_collections(): params = request.args user_collections = list(g.user_session.user_collections) return render_template("collections/list.html", - params = params, - collections = user_collections, - ) + params=params, + collections=user_collections, + ) + @app.route("/collections/remove", methods=('POST',)) def remove_traits(): @@ -151,7 +158,8 @@ def remove_traits(): traits_to_remove = params['trait_list'] traits_to_remove = process_traits(traits_to_remove) - members_now = g.user_session.remove_traits_from_collection(uc_id, traits_to_remove) + members_now = g.user_session.remove_traits_from_collection( + uc_id, traits_to_remove) return redirect(url_for("view_collection", uc_id=uc_id)) @@ -174,7 +182,8 @@ def delete_collection(): else: flash("We've deleted the selected collection.", "alert-info") else: - flash("We've deleted the collection: {}.".format(collection_name), "alert-info") + flash("We've deleted the collection: {}.".format( + collection_name), "alert-info") return redirect(url_for('list_collections')) @@ -184,7 +193,8 @@ def view_collection(): params = request.args uc_id = params['uc_id'] - uc = next((collection for collection in g.user_session.user_collections if collection["id"] == uc_id)) + uc = next( + (collection for collection in g.user_session.user_collections if collection["id"] == uc_id)) traits = uc["members"] trait_obs = [] @@ -196,25 +206,28 @@ def view_collection(): name, dataset_name = atrait.split(':') if dataset_name == "Temp": group = name.split("_")[2] - dataset = create_dataset(dataset_name, dataset_type = "Temp", group_name = group) + dataset = create_dataset( + dataset_name, dataset_type="Temp", group_name=group) trait_ob = create_trait(name=name, dataset=dataset) else: dataset = create_dataset(dataset_name) trait_ob = create_trait(name=name, dataset=dataset) - trait_ob = retrieve_trait_info(trait_ob, dataset, get_qtl_info=True) + trait_ob = retrieve_trait_info( + trait_ob, dataset, get_qtl_info=True) trait_obs.append(trait_ob) json_version.append(jsonable(trait_ob)) collection_info = dict(trait_obs=trait_obs, - uc = uc) + uc=uc) if "json" in params: return json.dumps(json_version) else: return render_template("collections/view.html", - **collection_info - ) + **collection_info + ) + @app.route("/collections/change_name", methods=('POST',)) def change_collection_name(): @@ -226,4 +239,3 @@ def change_collection_name(): g.user_session.change_collection_name(collection_id, new_name) return new_name - diff --git a/wqflask/wqflask/comparison_bar_chart/comparison_bar_chart.py b/wqflask/wqflask/comparison_bar_chart/comparison_bar_chart.py index 92de6073..cb88eb53 100644 --- a/wqflask/wqflask/comparison_bar_chart/comparison_bar_chart.py +++ b/wqflask/wqflask/comparison_bar_chart/comparison_bar_chart.py @@ -1,4 +1,4 @@ -## Copyright (C) University of Tennessee Health Science Center, Memphis, TN. +# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License @@ -23,7 +23,7 @@ from pprint import pformat as pf from base.trait import create_trait from base import data_set from utility import webqtlUtil, helper_functions, corr_result_helpers -import utility.webqtlUtil #this is for parallel computing only. +import utility.webqtlUtil # this is for parallel computing only. from wqflask.correlation import correlation_functions from MySQLdb import escape_string as escape @@ -31,19 +31,21 @@ from MySQLdb import escape_string as escape from flask import Flask, g -class ComparisonBarChart(object): +class ComparisonBarChart: def __init__(self, start_vars): - trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')] + trait_db_list = [trait.strip() + for trait in start_vars['trait_list'].split(',')] helper_functions.get_trait_db_obs(self, trait_db_list) self.all_sample_list = [] self.traits = [] self.insufficient_shared_samples = False - this_group = self.trait_list[0][1].group.name #ZS: Getting initial group name before verifying all traits are in the same group in the following loop + # ZS: Getting initial group name before verifying all traits are in the same group in the following loop + this_group = self.trait_list[0][1].group.name for trait_db in self.trait_list: - + if trait_db[1].group.name != this_group: self.insufficient_shared_samples = True break @@ -51,7 +53,7 @@ class ComparisonBarChart(object): this_group = trait_db[1].group.name this_trait = trait_db[0] self.traits.append(this_trait) - + this_sample_data = this_trait.data for sample in this_sample_data: @@ -74,10 +76,10 @@ class ComparisonBarChart(object): this_trait_vals.append('') self.sample_data.append(this_trait_vals) - self.js_data = dict(traits = [trait.name for trait in self.traits], - samples = self.all_sample_list, - sample_data = self.sample_data,) - + self.js_data = dict(traits=[trait.name for trait in self.traits], + samples=self.all_sample_list, + sample_data=self.sample_data,) + def get_trait_db_obs(self, trait_db_list): self.trait_list = [] @@ -88,9 +90,8 @@ class ComparisonBarChart(object): #print("dataset_name:", dataset_name) dataset_ob = data_set.create_dataset(dataset_name) trait_ob = create_trait(dataset=dataset_ob, - name=trait_name, - cellid=None) + name=trait_name, + cellid=None) self.trait_list.append((trait_ob, dataset_ob)) #print("trait_list:", self.trait_list) - diff --git a/wqflask/wqflask/correlation/corr_scatter_plot.py b/wqflask/wqflask/correlation/corr_scatter_plot.py index c87776bb..cafb9265 100644 --- a/wqflask/wqflask/correlation/corr_scatter_plot.py +++ b/wqflask/wqflask/correlation/corr_scatter_plot.py @@ -9,24 +9,29 @@ from scipy import stats import numpy as np import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) -class CorrScatterPlot(object): + +class CorrScatterPlot: """Page that displays a correlation scatterplot with a line fitted to it""" def __init__(self, params): if "Temp" in params['dataset_1']: - self.dataset_1 = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = params['dataset_1'].split("_")[1]) + self.dataset_1 = data_set.create_dataset( + dataset_name="Temp", dataset_type="Temp", group_name=params['dataset_1'].split("_")[1]) else: self.dataset_1 = data_set.create_dataset(params['dataset_1']) if "Temp" in params['dataset_2']: - self.dataset_2 = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = params['dataset_2'].split("_")[1]) + self.dataset_2 = data_set.create_dataset( + dataset_name="Temp", dataset_type="Temp", group_name=params['dataset_2'].split("_")[1]) else: self.dataset_2 = data_set.create_dataset(params['dataset_2']) #self.dataset_3 = data_set.create_dataset(params['dataset_3']) - self.trait_1 = create_trait(name=params['trait_1'], dataset=self.dataset_1) - self.trait_2 = create_trait(name=params['trait_2'], dataset=self.dataset_2) + self.trait_1 = create_trait( + name=params['trait_1'], dataset=self.dataset_1) + self.trait_2 = create_trait( + name=params['trait_2'], dataset=self.dataset_2) #self.trait_3 = create_trait(name=params['trait_3'], dataset=self.dataset_3) self.method = params['method'] @@ -37,10 +42,13 @@ class CorrScatterPlot(object): if self.dataset_1.group.f1list != None: primary_samples += self.dataset_1.group.f1list - self.trait_1 = retrieve_sample_data(self.trait_1, self.dataset_1, primary_samples) - self.trait_2 = retrieve_sample_data(self.trait_2, self.dataset_2, primary_samples) + self.trait_1 = retrieve_sample_data( + self.trait_1, self.dataset_1, primary_samples) + self.trait_2 = retrieve_sample_data( + self.trait_2, self.dataset_2, primary_samples) - samples_1, samples_2, num_overlap = corr_result_helpers.normalize_values_with_samples(self.trait_1.data, self.trait_2.data) + samples_1, samples_2, num_overlap = corr_result_helpers.normalize_values_with_samples( + self.trait_1.data, self.trait_2.data) self.data = [] self.indIDs = list(samples_1.keys()) @@ -53,72 +61,76 @@ class CorrScatterPlot(object): vals_2.append(samples_2[sample].value) self.data.append(vals_2) - slope, intercept, r_value, p_value, std_err = stats.linregress(vals_1, vals_2) + slope, intercept, r_value, p_value, std_err = stats.linregress( + vals_1, vals_2) if slope < 0.001: slope_string = '%.3E' % slope else: slope_string = '%.3f' % slope - - x_buffer = (max(vals_1) - min(vals_1))*0.1 - y_buffer = (max(vals_2) - min(vals_2))*0.1 + + x_buffer = (max(vals_1) - min(vals_1)) * 0.1 + y_buffer = (max(vals_2) - min(vals_2)) * 0.1 x_range = [min(vals_1) - x_buffer, max(vals_1) + x_buffer] y_range = [min(vals_2) - y_buffer, max(vals_2) + y_buffer] - intercept_coords = get_intercept_coords(slope, intercept, x_range, y_range) + intercept_coords = get_intercept_coords( + slope, intercept, x_range, y_range) rx = stats.rankdata(vals_1) ry = stats.rankdata(vals_2) self.rdata = [] self.rdata.append(rx.tolist()) - self.rdata.append(ry.tolist()) - srslope, srintercept, srr_value, srp_value, srstd_err = stats.linregress(rx, ry) + self.rdata.append(ry.tolist()) + srslope, srintercept, srr_value, srp_value, srstd_err = stats.linregress( + rx, ry) if srslope < 0.001: srslope_string = '%.3E' % srslope else: srslope_string = '%.3f' % srslope - x_buffer = (max(rx) - min(rx))*0.1 - y_buffer = (max(ry) - min(ry))*0.1 + x_buffer = (max(rx) - min(rx)) * 0.1 + y_buffer = (max(ry) - min(ry)) * 0.1 sr_range = [min(rx) - x_buffer, max(rx) + x_buffer] - sr_intercept_coords = get_intercept_coords(srslope, srintercept, sr_range, sr_range) + sr_intercept_coords = get_intercept_coords( + srslope, srintercept, sr_range, sr_range) self.collections_exist = "False" if g.user_session.num_collections > 0: self.collections_exist = "True" self.js_data = dict( - data = self.data, - rdata = self.rdata, - indIDs = self.indIDs, - trait_1 = self.trait_1.dataset.name + ": " + str(self.trait_1.name), - trait_2 = self.trait_2.dataset.name + ": " + str(self.trait_2.name), - samples_1 = samples_1, - samples_2 = samples_2, - num_overlap = num_overlap, - vals_1 = vals_1, - vals_2 = vals_2, - x_range = x_range, - y_range = y_range, - sr_range = sr_range, - intercept_coords = intercept_coords, - sr_intercept_coords = sr_intercept_coords, - - slope = slope, - slope_string = slope_string, - intercept = intercept, - r_value = r_value, - p_value = p_value, - - srslope = srslope, - srslope_string = srslope_string, - srintercept = srintercept, - srr_value = srr_value, - srp_value = srp_value + data=self.data, + rdata=self.rdata, + indIDs=self.indIDs, + trait_1=self.trait_1.dataset.name + ": " + str(self.trait_1.name), + trait_2=self.trait_2.dataset.name + ": " + str(self.trait_2.name), + samples_1=samples_1, + samples_2=samples_2, + num_overlap=num_overlap, + vals_1=vals_1, + vals_2=vals_2, + x_range=x_range, + y_range=y_range, + sr_range=sr_range, + intercept_coords=intercept_coords, + sr_intercept_coords=sr_intercept_coords, + + slope=slope, + slope_string=slope_string, + intercept=intercept, + r_value=r_value, + p_value=p_value, + + srslope=srslope, + srslope_string=srslope_string, + srintercept=srintercept, + srr_value=srr_value, + srp_value=srp_value #trait3 = self.trait_3.data, #vals_3 = vals_3 @@ -129,10 +141,10 @@ class CorrScatterPlot(object): def get_intercept_coords(slope, intercept, x_range, y_range): intercept_coords = [] - y1 = slope*x_range[0] + intercept - y2 = slope*x_range[1] + intercept - x1 = (y1-intercept)/slope - x2 = (y2-intercept)/slope + y1 = slope * x_range[0] + intercept + y2 = slope * x_range[1] + intercept + x1 = (y1 - intercept) / slope + x2 = (y2 - intercept) / slope intercept_coords.append([x1, y1]) intercept_coords.append([x2, y2]) diff --git a/wqflask/wqflask/correlation/correlation_functions.py b/wqflask/wqflask/correlation/correlation_functions.py index fd7691d4..c8b9da0e 100644 --- a/wqflask/wqflask/correlation/correlation_functions.py +++ b/wqflask/wqflask/correlation/correlation_functions.py @@ -25,7 +25,6 @@ # Last updated by NL 2011/03/23 import math -import rpy2.robjects import string from base.mrna_assay_tissue_data import MrnaAssayTissueData @@ -34,54 +33,55 @@ from flask import Flask, g ##################################################################################### -#Input: primaryValue(list): one list of expression values of one probeSet, +# Input: primaryValue(list): one list of expression values of one probeSet, # targetValue(list): one list of expression values of one probeSet, # method(string): indicate correlation method ('pearson' or 'spearman') -#Output: corr_result(list): first item is Correlation Value, second item is tissue number, +# Output: corr_result(list): first item is Correlation Value, second item is tissue number, # third item is PValue -#Function: get correlation value,Tissue quantity ,p value result by using R; -#Note : This function is special case since both primaryValue and targetValue are from -#the same dataset. So the length of these two parameters is the same. They are pairs. -#Also, in the datatable TissueProbeSetData, all Tissue values are loaded based on -#the same tissue order +# Function: get correlation value,Tissue quantity ,p value result by using R; +# Note : This function is special case since both primaryValue and targetValue are from +# the same dataset. So the length of these two parameters is the same. They are pairs. +# Also, in the datatable TissueProbeSetData, all Tissue values are loaded based on +# the same tissue order ##################################################################################### -def cal_zero_order_corr_for_tiss (primaryValue=[], targetValue=[], method='pearson'): +def cal_zero_order_corr_for_tiss(primaryValue=[], targetValue=[], method='pearson'): - R_primary = rpy2.robjects.FloatVector(list(range(len(primaryValue)))) N = len(primaryValue) - for i in range(len(primaryValue)): - R_primary[i] = primaryValue[i] + # R_primary = rpy2.robjects.FloatVector(list(range(len(primaryValue)))) + # for i in range(len(primaryValue)): + # R_primary[i] = primaryValue[i] - R_target = rpy2.robjects.FloatVector(list(range(len(targetValue)))) - for i in range(len(targetValue)): - R_target[i]=targetValue[i] + # R_target = rpy2.robjects.FloatVector(list(range(len(targetValue)))) + # for i in range(len(targetValue)): + # R_target[i] = targetValue[i] - R_corr_test = rpy2.robjects.r['cor.test'] - if method =='spearman': - R_result = R_corr_test(R_primary, R_target, method='spearman') - else: - R_result = R_corr_test(R_primary, R_target) + # R_corr_test = rpy2.robjects.r['cor.test'] + # if method == 'spearman': + # R_result = R_corr_test(R_primary, R_target, method='spearman') + # else: + # R_result = R_corr_test(R_primary, R_target) - corr_result =[] - corr_result.append( R_result[3][0]) - corr_result.append( N ) - corr_result.append( R_result[2][0]) + # corr_result = [] + # corr_result.append(R_result[3][0]) + # corr_result.append(N) + # corr_result.append(R_result[2][0]) - return corr_result + return [None, N, None] + # return corr_result ######################################################################################################## -#input: cursor, symbolList (list), dataIdDict(Dict): key is symbol -#output: SymbolValuePairDict(dictionary):one dictionary of Symbol and Value Pair. +# input: cursor, symbolList (list), dataIdDict(Dict): key is symbol +# output: SymbolValuePairDict(dictionary):one dictionary of Symbol and Value Pair. # key is symbol, value is one list of expression values of one probeSet. -#function: wrapper function for getSymbolValuePairDict function +# function: wrapper function for getSymbolValuePairDict function # build gene symbol list if necessary, cut it into small lists if necessary, # then call getSymbolValuePairDict function and merge the results. ######################################################################################################## def get_trait_symbol_and_tissue_values(symbol_list=None): tissue_data = MrnaAssayTissueData(gene_symbols=symbol_list) - - if len(tissue_data.gene_symbols): - return tissue_data.get_symbol_values_pairs() + if len(tissue_data.gene_symbols) >0: + results = tissue_data.get_symbol_values_pairs() + return results diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py new file mode 100644 index 00000000..6974dbd5 --- /dev/null +++ b/wqflask/wqflask/correlation/correlation_gn3_api.py @@ -0,0 +1,263 @@ +"""module that calls the gn3 api's to do the correlation """ +import json + +from wqflask.correlation import correlation_functions + +from base import data_set + +from base.trait import create_trait +from base.trait import retrieve_sample_data + +from gn3.computations.correlations import compute_all_sample_correlation +from gn3.computations.correlations import map_shared_keys_to_values +from gn3.computations.correlations import compute_all_lit_correlation +from gn3.computations.correlations import compute_tissue_correlation +from gn3.db_utils import database_connector + + +def create_target_this_trait(start_vars): + """this function creates the required trait and target dataset for correlation""" + + this_dataset = data_set.create_dataset(dataset_name=start_vars['dataset']) + target_dataset = data_set.create_dataset( + dataset_name=start_vars['corr_dataset']) + this_trait = create_trait(dataset=this_dataset, + name=start_vars['trait_id']) + sample_data = () + return (this_dataset, this_trait, target_dataset, sample_data) + + +def process_samples(start_vars, sample_names, excluded_samples=None): + """process samples""" + sample_data = {} + if not excluded_samples: + excluded_samples = () + sample_vals_dict = json.loads(start_vars["sample_vals"]) + for sample in sample_names: + if sample not in excluded_samples: + val = sample_vals_dict[sample] + if not val.strip().lower() == "x": + sample_data[str(sample)] = float(val) + return sample_data + + +def merge_correlation_results(correlation_results, target_correlation_results): + + corr_dict = {} + + for trait_dict in target_correlation_results: + for trait_name, values in trait_dict.items(): + + corr_dict[trait_name] = values + for trait_dict in correlation_results: + for trait_name, values in trait_dict.items(): + + if corr_dict.get(trait_name): + + trait_dict[trait_name].update(corr_dict.get(trait_name)) + + return correlation_results + + +def sample_for_trait_lists(corr_results, target_dataset, + this_trait, this_dataset, start_vars): + """interface function for correlation on top results""" + + (this_trait_data, target_dataset) = fetch_sample_data( + start_vars, this_trait, this_dataset, target_dataset) + correlation_results = compute_all_sample_correlation(corr_method="pearson", + this_trait=this_trait_data, + target_dataset=target_dataset) + + return correlation_results + + +def tissue_for_trait_lists(corr_results, this_dataset, this_trait): + """interface function for doing tissue corr_results on trait_list""" + trait_lists = dict([(list(corr_result)[0], True) + for corr_result in corr_results]) + # trait_lists = {list(corr_results)[0]: 1 for corr_result in corr_results} + traits_symbol_dict = this_dataset.retrieve_genes("Symbol") + traits_symbol_dict = dict({trait_name: symbol for ( + trait_name, symbol) in traits_symbol_dict.items() if trait_lists.get(trait_name)}) + tissue_input = get_tissue_correlation_input( + this_trait, traits_symbol_dict) + + if tissue_input is not None: + (primary_tissue_data, target_tissue_data) = tissue_input + corr_results = compute_tissue_correlation( + primary_tissue_dict=primary_tissue_data, + target_tissues_data=target_tissue_data, + corr_method="pearson") + return corr_results + + +def lit_for_trait_list(corr_results, this_dataset, this_trait): + (this_trait_geneid, geneid_dict, species) = do_lit_correlation( + this_trait, this_dataset) + + # trait_lists = {list(corr_results)[0]: 1 for corr_result in corr_results} + trait_lists = dict([(list(corr_result)[0], True) + for corr_result in corr_results]) + + geneid_dict = {trait_name: geneid for (trait_name, geneid) in geneid_dict.items() if + trait_lists.get(trait_name)} + + conn, _cursor_object = database_connector() + + with conn: + + correlation_results = compute_all_lit_correlation( + conn=conn, trait_lists=list(geneid_dict.items()), + species=species, gene_id=this_trait_geneid) + + return correlation_results + + +def fetch_sample_data(start_vars, this_trait, this_dataset, target_dataset): + + sample_data = process_samples( + start_vars, this_dataset.group.samplelist) + target_dataset.get_trait_data(list(sample_data.keys())) + this_trait = retrieve_sample_data(this_trait, this_dataset) + this_trait_data = { + "trait_sample_data": sample_data, + "trait_id": start_vars["trait_id"] + } + + results = map_shared_keys_to_values( + target_dataset.samplelist, target_dataset.trait_data) + + return (this_trait_data, results) + + +def compute_correlation(start_vars, method="pearson"): + """compute correlation for to call gn3 api""" + # pylint: disable-msg=too-many-locals + + corr_type = start_vars['corr_type'] + + (this_dataset, this_trait, target_dataset, + sample_data) = create_target_this_trait(start_vars) + + target_dataset_type = target_dataset.type + this_dataset_type = this_dataset.type + + method = start_vars['corr_sample_method'] + corr_return_results = int(start_vars.get("corr_return_results", 100)) + corr_input_data = {} + + if corr_type == "sample": + (this_trait_data, target_dataset_data) = fetch_sample_data( + start_vars, this_trait, this_dataset, target_dataset) + correlation_results = compute_all_sample_correlation(corr_method=method, + this_trait=this_trait_data, + target_dataset=target_dataset_data) + + elif corr_type == "tissue": + trait_symbol_dict = this_dataset.retrieve_genes("Symbol") + tissue_input = get_tissue_correlation_input( + this_trait, trait_symbol_dict) + + if tissue_input is not None: + (primary_tissue_data, target_tissue_data) = tissue_input + + corr_input_data = { + "primary_tissue": primary_tissue_data, + "target_tissues_dict": target_tissue_data + } + correlation_results = compute_tissue_correlation( + primary_tissue_dict=corr_input_data["primary_tissue"], + target_tissues_data=corr_input_data[ + "target_tissues_dict"], + corr_method=method + + ) + + elif corr_type == "lit": + (this_trait_geneid, geneid_dict, species) = do_lit_correlation( + this_trait, this_dataset) + + conn, _cursor_object = database_connector() + with conn: + correlation_results = compute_all_lit_correlation( + conn=conn, trait_lists=list(geneid_dict.items()), + species=species, gene_id=this_trait_geneid) + + correlation_results = correlation_results[0:corr_return_results] + + compute_all = True # later to be passed as argument + + if (compute_all): + + correlation_results = compute_corr_for_top_results(correlation_results, + this_trait, + this_dataset, + target_dataset, + corr_type) + + correlation_data = {"correlation_results": correlation_results, + "this_trait": this_trait.name, + "target_dataset": start_vars['corr_dataset'], + "return_results": corr_return_results} + + return correlation_data + + +def compute_corr_for_top_results(correlation_results, + this_trait, + this_dataset, + target_dataset, + corr_type): + if corr_type != "tissue" and this_dataset.type == "ProbeSet" and target_dataset.type == "ProbeSet": + + tissue_result = tissue_for_trait_lists( + correlation_results, this_dataset, this_trait) + + if tissue_result: + + correlation_results = merge_correlation_results( + correlation_results, tissue_result) + + if corr_type != "lit" and this_dataset.type == "ProbeSet" and target_dataset.type == "ProbeSet": + lit_result = lit_for_trait_list( + correlation_results, this_dataset, this_trait) + + if lit_result: + correlation_results = merge_correlation_results( + correlation_results, lit_result) + + if corr_type != "sample": + pass + + return correlation_results + + +def do_lit_correlation(this_trait, this_dataset): + """function for fetching lit inputs""" + geneid_dict = this_dataset.retrieve_genes("GeneId") + species = this_dataset.group.species.lower() + trait_geneid = this_trait.geneid + return (trait_geneid, geneid_dict, species) + + +def get_tissue_correlation_input(this_trait, trait_symbol_dict): + """Gets tissue expression values for the primary trait and target tissues values""" + primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( + symbol_list=[this_trait.symbol]) + if this_trait.symbol.lower() in primary_trait_tissue_vals_dict: + primary_trait_tissue_values = primary_trait_tissue_vals_dict[this_trait.symbol.lower( + )] + corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( + symbol_list=list(trait_symbol_dict.values())) + primary_tissue_data = { + "this_id": this_trait.name, + "tissue_values": primary_trait_tissue_values + + } + target_tissue_data = { + "trait_symbol_dict": trait_symbol_dict, + "symbol_tissue_vals_dict": corr_result_tissue_vals_dict + } + return (primary_tissue_data, target_tissue_data) + return None diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py index fb4dc4f4..10e0d626 100644 --- a/wqflask/wqflask/correlation/show_corr_results.py +++ b/wqflask/wqflask/correlation/show_corr_results.py @@ -1,4 +1,4 @@ -## Copyright (C) University of Tennessee Health Science Center, Memphis, TN. +# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License @@ -28,8 +28,6 @@ import utility.webqtlUtil from base.trait import create_trait -from rpy2.robjects.packages import importr - from base import data_set from utility import helper_functions from utility import corr_result_helpers @@ -46,7 +44,6 @@ from utility.db_tools import escape from flask import g -utils = importr("utils") logger = utility.logger.getLogger(__name__) METHOD_LIT = "3" @@ -58,7 +55,7 @@ TISSUE_METHODS = [METHOD_TISSUE_PEARSON, METHOD_TISSUE_RANK] TISSUE_MOUSE_DB = 1 -class CorrelationResults(object): +class CorrelationResults: def __init__(self, start_vars): # get trait list from db (database name) # calculate correlation with Base vector and targets @@ -78,11 +75,12 @@ class CorrelationResults(object): with Bench("Doing correlations"): if start_vars['dataset'] == "Temp": - self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = start_vars['group']) + self.dataset = data_set.create_dataset( + dataset_name="Temp", dataset_type="Temp", group_name=start_vars['group']) self.trait_id = start_vars['trait_id'] self.this_trait = create_trait(dataset=self.dataset, - name=self.trait_id, - cellid=None) + name=self.trait_id, + cellid=None) else: helper_functions.get_species_dataset_trait(self, start_vars) @@ -95,9 +93,10 @@ class CorrelationResults(object): self.p_range_lower = get_float(start_vars, 'p_range_lower', -1.0) self.p_range_upper = get_float(start_vars, 'p_range_upper', 1.0) - if ('loc_chr' in start_vars and - 'min_loc_mb' in start_vars and - 'max_loc_mb' in start_vars): + + if ('loc_chr' in start_vars + and 'min_loc_mb' in start_vars + and 'max_loc_mb' in start_vars): self.location_type = get_string(start_vars, 'location_type') self.location_chr = get_string(start_vars, 'loc_chr') @@ -109,8 +108,8 @@ class CorrelationResults(object): self.get_formatted_corr_type() self.return_number = int(start_vars['corr_return_results']) - #The two if statements below append samples to the sample list based upon whether the user - #rselected Primary Samples Only, Other Samples Only, or All Samples + # The two if statements below append samples to the sample list based upon whether the user + # rselected Primary Samples Only, Other Samples Only, or All Samples primary_samples = self.dataset.group.samplelist if self.dataset.group.parlist != None: @@ -118,23 +117,26 @@ class CorrelationResults(object): if self.dataset.group.f1list != None: primary_samples += self.dataset.group.f1list - #If either BXD/whatever Only or All Samples, append all of that group's samplelist + # If either BXD/whatever Only or All Samples, append all of that group's samplelist if corr_samples_group != 'samples_other': self.process_samples(start_vars, primary_samples) - #If either Non-BXD/whatever or All Samples, get all samples from this_trait.data and - #exclude the primary samples (because they would have been added in the previous - #if statement if the user selected All Samples) + # If either Non-BXD/whatever or All Samples, get all samples from this_trait.data and + # exclude the primary samples (because they would have been added in the previous + # if statement if the user selected All Samples) if corr_samples_group != 'samples_primary': if corr_samples_group == 'samples_other': primary_samples = [x for x in primary_samples if x not in ( - self.dataset.group.parlist + self.dataset.group.f1list)] - self.process_samples(start_vars, list(self.this_trait.data.keys()), primary_samples) + self.dataset.group.parlist + self.dataset.group.f1list)] + self.process_samples(start_vars, list( + self.this_trait.data.keys()), primary_samples) - self.target_dataset = data_set.create_dataset(start_vars['corr_dataset']) + self.target_dataset = data_set.create_dataset( + start_vars['corr_dataset']) self.target_dataset.get_trait_data(list(self.sample_data.keys())) - self.header_fields = get_header_fields(self.target_dataset.type, self.corr_method) + self.header_fields = get_header_fields( + self.target_dataset.type, self.corr_method) if self.target_dataset.type == "ProbeSet": self.filter_cols = [7, 6] @@ -153,7 +155,8 @@ class CorrelationResults(object): tissue_corr_data = self.do_tissue_correlation_for_all_traits() if tissue_corr_data != None: for trait in list(tissue_corr_data.keys())[:self.return_number]: - self.get_sample_r_and_p_values(trait, self.target_dataset.trait_data[trait]) + self.get_sample_r_and_p_values( + trait, self.target_dataset.trait_data[trait]) else: for trait, values in list(self.target_dataset.trait_data.items()): self.get_sample_r_and_p_values(trait, values) @@ -163,7 +166,8 @@ class CorrelationResults(object): lit_corr_data = self.do_lit_correlation_for_all_traits() for trait in list(lit_corr_data.keys())[:self.return_number]: - self.get_sample_r_and_p_values(trait, self.target_dataset.trait_data[trait]) + self.get_sample_r_and_p_values( + trait, self.target_dataset.trait_data[trait]) elif self.corr_type == "sample": for trait, values in list(self.target_dataset.trait_data.items()): @@ -172,8 +176,7 @@ class CorrelationResults(object): self.correlation_data = collections.OrderedDict(sorted(list(self.correlation_data.items()), key=lambda t: -abs(t[1][0]))) - - #ZS: Convert min/max chromosome to an int for the location range option + # ZS: Convert min/max chromosome to an int for the location range option range_chr_as_int = None for order_id, chr_info in list(self.dataset.species.chromosomes.chromosomes.items()): if 'loc_chr' in start_vars: @@ -181,7 +184,8 @@ class CorrelationResults(object): range_chr_as_int = order_id for _trait_counter, trait in enumerate(list(self.correlation_data.keys())[:self.return_number]): - trait_object = create_trait(dataset=self.target_dataset, name=trait, get_qtl_info=True, get_sample_info=False) + trait_object = create_trait( + dataset=self.target_dataset, name=trait, get_qtl_info=True, get_sample_info=False) if not trait_object: continue @@ -194,8 +198,9 @@ class CorrelationResults(object): if chr_info.name == trait_object.chr: chr_as_int = order_id - if (float(self.correlation_data[trait][0]) >= self.p_range_lower and - float(self.correlation_data[trait][0]) <= self.p_range_upper): + + if (float(self.correlation_data[trait][0]) >= self.p_range_lower + and float(self.correlation_data[trait][0]) <= self.p_range_upper): if (self.target_dataset.type == "ProbeSet" or self.target_dataset.type == "Publish") and bool(trait_object.mean): if (self.min_expr != None) and (float(trait_object.mean) < self.min_expr): @@ -215,8 +220,8 @@ class CorrelationResults(object): continue (trait_object.sample_r, - trait_object.sample_p, - trait_object.num_overlap) = self.correlation_data[trait] + trait_object.sample_p, + trait_object.num_overlap) = self.correlation_data[trait] # Set some sane defaults trait_object.tissue_corr = 0 @@ -236,7 +241,8 @@ class CorrelationResults(object): if self.corr_type != "tissue" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet": self.do_tissue_correlation_for_trait_list() - self.json_results = generate_corr_json(self.correlation_results, self.this_trait, self.dataset, self.target_dataset) + self.json_results = generate_corr_json( + self.correlation_results, self.this_trait, self.dataset, self.target_dataset) ############################################################################################################################################ @@ -259,39 +265,43 @@ class CorrelationResults(object): def do_tissue_correlation_for_trait_list(self, tissue_dataset_id=1): """Given a list of correlation results (self.correlation_results), gets the tissue correlation value for each""" - #Gets tissue expression values for the primary trait + # Gets tissue expression values for the primary trait primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( - symbol_list = [self.this_trait.symbol]) + symbol_list=[self.this_trait.symbol]) if self.this_trait.symbol.lower() in primary_trait_tissue_vals_dict: - primary_trait_tissue_values = primary_trait_tissue_vals_dict[self.this_trait.symbol.lower()] - gene_symbol_list = [trait.symbol for trait in self.correlation_results if trait.symbol] + primary_trait_tissue_values = primary_trait_tissue_vals_dict[self.this_trait.symbol.lower( + )] + gene_symbol_list = [ + trait.symbol for trait in self.correlation_results if trait.symbol] - corr_result_tissue_vals_dict= correlation_functions.get_trait_symbol_and_tissue_values( - symbol_list=gene_symbol_list) + corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( + symbol_list=gene_symbol_list) for trait in self.correlation_results: if trait.symbol and trait.symbol.lower() in corr_result_tissue_vals_dict: - this_trait_tissue_values = corr_result_tissue_vals_dict[trait.symbol.lower()] + this_trait_tissue_values = corr_result_tissue_vals_dict[trait.symbol.lower( + )] result = correlation_functions.cal_zero_order_corr_for_tiss(primary_trait_tissue_values, - this_trait_tissue_values, - self.corr_method) + this_trait_tissue_values, + self.corr_method) trait.tissue_corr = result[0] trait.tissue_pvalue = result[2] def do_tissue_correlation_for_all_traits(self, tissue_dataset_id=1): - #Gets tissue expression values for the primary trait + # Gets tissue expression values for the primary trait primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( - symbol_list = [self.this_trait.symbol]) + symbol_list=[self.this_trait.symbol]) if self.this_trait.symbol.lower() in primary_trait_tissue_vals_dict: - primary_trait_tissue_values = primary_trait_tissue_vals_dict[self.this_trait.symbol.lower()] + primary_trait_tissue_values = primary_trait_tissue_vals_dict[self.this_trait.symbol.lower( + )] #print("trait_gene_symbols: ", pf(trait_gene_symbols.values())) - corr_result_tissue_vals_dict= correlation_functions.get_trait_symbol_and_tissue_values( - symbol_list=list(self.trait_symbol_dict.values())) + corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( + symbol_list=list(self.trait_symbol_dict.values())) #print("corr_result_tissue_vals: ", pf(corr_result_tissue_vals_dict)) @@ -300,27 +310,30 @@ class CorrelationResults(object): tissue_corr_data = {} for trait, symbol in list(self.trait_symbol_dict.items()): if symbol and symbol.lower() in corr_result_tissue_vals_dict: - this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower()] + this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower( + )] result = correlation_functions.cal_zero_order_corr_for_tiss(primary_trait_tissue_values, - this_trait_tissue_values, - self.corr_method) + this_trait_tissue_values, + self.corr_method) tissue_corr_data[trait] = [symbol, result[0], result[2]] tissue_corr_data = collections.OrderedDict(sorted(list(tissue_corr_data.items()), - key=lambda t: -abs(t[1][1]))) + key=lambda t: -abs(t[1][1]))) return tissue_corr_data def do_lit_correlation_for_trait_list(self): - input_trait_mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), self.this_trait.geneid) + input_trait_mouse_gene_id = self.convert_to_mouse_gene_id( + self.dataset.group.species.lower(), self.this_trait.geneid) for trait in self.correlation_results: if trait.geneid: - trait.mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), trait.geneid) + trait.mouse_gene_id = self.convert_to_mouse_gene_id( + self.dataset.group.species.lower(), trait.geneid) else: trait.mouse_gene_id = None @@ -348,13 +361,14 @@ class CorrelationResults(object): else: trait.lit_corr = 0 - def do_lit_correlation_for_all_traits(self): - input_trait_mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), self.this_trait.geneid) + input_trait_mouse_gene_id = self.convert_to_mouse_gene_id( + self.dataset.group.species.lower(), self.this_trait.geneid) lit_corr_data = {} for trait, gene_id in list(self.trait_geneid_dict.items()): - mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), gene_id) + mouse_gene_id = self.convert_to_mouse_gene_id( + self.dataset.group.species.lower(), gene_id) if mouse_gene_id and str(mouse_gene_id).find(";") == -1: #print("gene_symbols:", input_trait_mouse_gene_id + " / " + mouse_gene_id) @@ -382,7 +396,7 @@ class CorrelationResults(object): lit_corr_data[trait] = [gene_id, 0] lit_corr_data = collections.OrderedDict(sorted(list(lit_corr_data.items()), - key=lambda t: -abs(t[1][1]))) + key=lambda t: -abs(t[1][1]))) return lit_corr_data @@ -440,21 +454,26 @@ class CorrelationResults(object): self.this_trait_vals.append(sample_value) target_vals.append(target_sample_value) - self.this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(self.this_trait_vals, target_vals) + self.this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values( + self.this_trait_vals, target_vals) if num_overlap > 5: - #ZS: 2015 could add biweight correlation, see http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3465711/ + # ZS: 2015 could add biweight correlation, see http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3465711/ if self.corr_method == 'bicor': - sample_r, sample_p = do_bicor(self.this_trait_vals, target_vals) + sample_r, sample_p = do_bicor( + self.this_trait_vals, target_vals) elif self.corr_method == 'pearson': - sample_r, sample_p = scipy.stats.pearsonr(self.this_trait_vals, target_vals) + sample_r, sample_p = scipy.stats.pearsonr( + self.this_trait_vals, target_vals) else: - sample_r, sample_p = scipy.stats.spearmanr(self.this_trait_vals, target_vals) + sample_r, sample_p = scipy.stats.spearmanr( + self.this_trait_vals, target_vals) if numpy.isnan(sample_r): pass else: - self.correlation_data[trait] = [sample_r, sample_p, num_overlap] + self.correlation_data[trait] = [ + sample_r, sample_p, num_overlap] def process_samples(self, start_vars, sample_names, excluded_samples=None): if not excluded_samples: @@ -475,16 +494,18 @@ def do_bicor(this_trait_vals, target_trait_vals): r_library("WGCNA") r_bicor = ro.r["bicorAndPvalue"] # Map the bicorAndPvalue function - r_options(stringsAsFactors = False) + r_options(stringsAsFactors=False) this_vals = ro.Vector(this_trait_vals) target_vals = ro.Vector(target_trait_vals) - the_r, the_p, _fisher_transform, _the_t, _n_obs = [numpy.asarray(x) for x in r_bicor(x = this_vals, y = target_vals)] + the_r, the_p, _fisher_transform, _the_t, _n_obs = [ + numpy.asarray(x) for x in r_bicor(x=this_vals, y=target_vals)] return the_r, the_p -def generate_corr_json(corr_results, this_trait, dataset, target_dataset, for_api = False): + +def generate_corr_json(corr_results, this_trait, dataset, target_dataset, for_api=False): results_list = [] for i, trait in enumerate(corr_results): if trait.view == False: @@ -493,7 +514,8 @@ def generate_corr_json(corr_results, this_trait, dataset, target_dataset, for_ap results_dict['index'] = i + 1 results_dict['trait_id'] = trait.name results_dict['dataset'] = trait.dataset.name - results_dict['hmac'] = hmac.data_hmac('{}:{}'.format(trait.name, trait.dataset.name)) + results_dict['hmac'] = hmac.data_hmac( + '{}:{}'.format(trait.name, trait.dataset.name)) if target_dataset.type == "ProbeSet": results_dict['symbol'] = trait.symbol results_dict['description'] = "N/A" @@ -544,7 +566,8 @@ def generate_corr_json(corr_results, this_trait, dataset, target_dataset, for_ap if bool(trait.authors): authors_list = trait.authors.split(',') if len(authors_list) > 6: - results_dict['authors_display'] = ", ".join(authors_list[:6]) + ", et al." + results_dict['authors_display'] = ", ".join( + authors_list[:6]) + ", et al." else: results_dict['authors_display'] = trait.authors if bool(trait.pubmed_id): @@ -574,85 +597,85 @@ def generate_corr_json(corr_results, this_trait, dataset, target_dataset, for_ap return json.dumps(results_list) + def get_header_fields(data_type, corr_method): if data_type == "ProbeSet": if corr_method == "spearman": header_fields = ['Index', - 'Record', - 'Symbol', - 'Description', - 'Location', - 'Mean', - 'Sample rho', - 'N', - 'Sample p(rho)', - 'Lit rho', - 'Tissue rho', - 'Tissue p(rho)', - 'Max LRS', - 'Max LRS Location', - 'Additive Effect'] + 'Record', + 'Symbol', + 'Description', + 'Location', + 'Mean', + 'Sample rho', + 'N', + 'Sample p(rho)', + 'Lit rho', + 'Tissue rho', + 'Tissue p(rho)', + 'Max LRS', + 'Max LRS Location', + 'Additive Effect'] else: header_fields = ['Index', - 'Record', - 'Symbol', - 'Description', - 'Location', - 'Mean', - 'Sample r', - 'N', - 'Sample p(r)', - 'Lit r', - 'Tissue r', - 'Tissue p(r)', - 'Max LRS', - 'Max LRS Location', - 'Additive Effect'] + 'Record', + 'Symbol', + 'Description', + 'Location', + 'Mean', + 'Sample r', + 'N', + 'Sample p(r)', + 'Lit r', + 'Tissue r', + 'Tissue p(r)', + 'Max LRS', + 'Max LRS Location', + 'Additive Effect'] elif data_type == "Publish": if corr_method == "spearman": header_fields = ['Index', - 'Record', - 'Abbreviation', - 'Description', - 'Mean', - 'Authors', - 'Year', - 'Sample rho', - 'N', - 'Sample p(rho)', - 'Max LRS', - 'Max LRS Location', - 'Additive Effect'] + 'Record', + 'Abbreviation', + 'Description', + 'Mean', + 'Authors', + 'Year', + 'Sample rho', + 'N', + 'Sample p(rho)', + 'Max LRS', + 'Max LRS Location', + 'Additive Effect'] else: header_fields = ['Index', - 'Record', - 'Abbreviation', - 'Description', - 'Mean', - 'Authors', - 'Year', - 'Sample r', - 'N', - 'Sample p(r)', - 'Max LRS', - 'Max LRS Location', - 'Additive Effect'] + 'Record', + 'Abbreviation', + 'Description', + 'Mean', + 'Authors', + 'Year', + 'Sample r', + 'N', + 'Sample p(r)', + 'Max LRS', + 'Max LRS Location', + 'Additive Effect'] else: if corr_method == "spearman": header_fields = ['Index', - 'ID', - 'Location', - 'Sample rho', - 'N', - 'Sample p(rho)'] + 'ID', + 'Location', + 'Sample rho', + 'N', + 'Sample p(rho)'] else: header_fields = ['Index', - 'ID', - 'Location', - 'Sample r', - 'N', - 'Sample p(r)'] + 'ID', + 'Location', + 'Sample r', + 'N', + 'Sample p(r)'] return header_fields - diff --git a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py index f77761d8..9ac02ac5 100644 --- a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py +++ b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py @@ -1,4 +1,4 @@ -## Copyright (C) University of Tennessee Health Science Center, Memphis, TN. +# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License @@ -25,8 +25,6 @@ import string import numpy as np import scipy -import rpy2.robjects as robjects -from rpy2.robjects.packages import importr from base import data_set from base.webqtlConfig import GENERATED_TEXT_DIR @@ -40,11 +38,11 @@ from utility.redis_tools import get_redis_conn Redis = get_redis_conn() THIRTY_DAYS = 60 * 60 * 24 * 30 - -class CorrelationMatrix(object): +class CorrelationMatrix: def __init__(self, start_vars): - trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')] + trait_db_list = [trait.strip() + for trait in start_vars['trait_list'].split(',')] helper_functions.get_trait_db_obs(self, trait_db_list) @@ -52,7 +50,8 @@ class CorrelationMatrix(object): self.traits = [] self.insufficient_shared_samples = False self.do_PCA = True - this_group = self.trait_list[0][1].group.name #ZS: Getting initial group name before verifying all traits are in the same group in the following loop + # ZS: Getting initial group name before verifying all traits are in the same group in the following loop + this_group = self.trait_list[0][1].group.name for trait_db in self.trait_list: this_group = trait_db[1].group.name this_trait = trait_db[0] @@ -76,10 +75,12 @@ class CorrelationMatrix(object): this_trait_vals.append('') self.sample_data.append(this_trait_vals) - if len(this_trait_vals) < len(self.trait_list): #Shouldn't do PCA if there are more traits than observations/samples + # Shouldn't do PCA if there are more traits than observations/samples + if len(this_trait_vals) < len(self.trait_list): self.do_PCA = False - self.lowest_overlap = 8 #ZS: Variable set to the lowest overlapping samples in order to notify user, or 8, whichever is lower (since 8 is when we want to display warning) + # ZS: Variable set to the lowest overlapping samples in order to notify user, or 8, whichever is lower (since 8 is when we want to display warning) + self.lowest_overlap = 8 self.corr_results = [] self.pca_corr_results = [] @@ -93,7 +94,7 @@ class CorrelationMatrix(object): corr_result_row = [] pca_corr_result_row = [] - is_spearman = False #ZS: To determine if it's above or below the diagonal + is_spearman = False # ZS: To determine if it's above or below the diagonal for target in self.trait_list: target_trait = target[0] target_db = target[1] @@ -112,7 +113,8 @@ class CorrelationMatrix(object): if sample in self.shared_samples_list: self.shared_samples_list.remove(sample) - this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, target_vals) + this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values( + this_trait_vals, target_vals) if num_overlap < self.lowest_overlap: self.lowest_overlap = num_overlap @@ -120,21 +122,25 @@ class CorrelationMatrix(object): corr_result_row.append([target_trait, 0, num_overlap]) pca_corr_result_row.append(0) else: - pearson_r, pearson_p = scipy.stats.pearsonr(this_trait_vals, target_vals) + pearson_r, pearson_p = scipy.stats.pearsonr( + this_trait_vals, target_vals) if is_spearman == False: sample_r, sample_p = pearson_r, pearson_p - if sample_r == 1: + if sample_r > 0.999: is_spearman = True else: - sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals) + sample_r, sample_p = scipy.stats.spearmanr( + this_trait_vals, target_vals) - corr_result_row.append([target_trait, sample_r, num_overlap]) + corr_result_row.append( + [target_trait, sample_r, num_overlap]) pca_corr_result_row.append(pearson_r) self.corr_results.append(corr_result_row) self.pca_corr_results.append(pca_corr_result_row) - self.export_filename, self.export_filepath = export_corr_matrix(self.corr_results) + self.export_filename, self.export_filepath = export_corr_matrix( + self.corr_results) self.trait_data_array = [] for trait_db in self.trait_list: @@ -154,69 +160,76 @@ class CorrelationMatrix(object): for sample in self.all_sample_list: groups.append(1) - try: - corr_result_eigen = np.linalg.eig(np.array(self.pca_corr_results)) - corr_eigen_value, corr_eigen_vectors = sortEigenVectors(corr_result_eigen) - - if self.do_PCA == True: - self.pca_works = "True" - self.pca_trait_ids = [] - pca = self.calculate_pca(list(range(len(self.traits))), corr_eigen_value, corr_eigen_vectors) - self.loadings_array = self.process_loadings() - else: - self.pca_works = "False" - except: - self.pca_works = "False" - - self.js_data = dict(traits = [trait.name for trait in self.traits], - groups = groups, - cols = list(range(len(self.traits))), - rows = list(range(len(self.traits))), - samples = self.all_sample_list, - sample_data = self.sample_data,) - - def calculate_pca(self, cols, corr_eigen_value, corr_eigen_vectors): - base = importr('base') - stats = importr('stats') - - corr_results_to_list = robjects.FloatVector([item for sublist in self.pca_corr_results for item in sublist]) - - m = robjects.r.matrix(corr_results_to_list, nrow=len(cols)) - eigen = base.eigen(m) - pca = stats.princomp(m, cor = "TRUE") - self.loadings = pca.rx('loadings') - self.scores = pca.rx('scores') - self.scale = pca.rx('scale') - - trait_array = zScore(self.trait_data_array) - trait_array_vectors = np.dot(corr_eigen_vectors, trait_array) - - pca_traits = [] - for i, vector in enumerate(trait_array_vectors): - #ZS: Check if below check is necessary - #if corr_eigen_value[i-1] > 100.0/len(self.trait_list): - pca_traits.append((vector*-1.0).tolist()) - - this_group_name = self.trait_list[0][1].group.name - temp_dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = this_group_name) - temp_dataset.group.get_samplelist() - for i, pca_trait in enumerate(pca_traits): - trait_id = "PCA" + str(i+1) + "_" + temp_dataset.group.species + "_" + this_group_name + "_" + datetime.datetime.now().strftime("%m%d%H%M%S") - this_vals_string = "" - position = 0 - for sample in temp_dataset.group.all_samples_ordered(): - if sample in self.shared_samples_list: - this_vals_string += str(pca_trait[position]) - this_vals_string += " " - position += 1 - else: - this_vals_string += "x " - this_vals_string = this_vals_string[:-1] - - Redis.set(trait_id, this_vals_string, ex=THIRTY_DAYS) - self.pca_trait_ids.append(trait_id) - - return pca + # Not doing PCA until rpy2 is excised + self.pca_works = "False" + # try: + # corr_result_eigen = np.linalg.eig(np.array(self.pca_corr_results)) + # corr_eigen_value, corr_eigen_vectors = sortEigenVectors( + # corr_result_eigen) + + # if self.do_PCA == True: + # self.pca_works = "True" + # self.pca_trait_ids = [] + # pca = self.calculate_pca( + # list(range(len(self.traits))), corr_eigen_value, corr_eigen_vectors) + # self.loadings_array = self.process_loadings() + # else: + # self.pca_works = "False" + # except: + # self.pca_works = "False" + + self.js_data = dict(traits=[trait.name for trait in self.traits], + groups=groups, + cols=list(range(len(self.traits))), + rows=list(range(len(self.traits))), + samples=self.all_sample_list, + sample_data=self.sample_data,) + + # def calculate_pca(self, cols, corr_eigen_value, corr_eigen_vectors): + # base = importr('base') + # stats = importr('stats') + + # corr_results_to_list = robjects.FloatVector( + # [item for sublist in self.pca_corr_results for item in sublist]) + + # m = robjects.r.matrix(corr_results_to_list, nrow=len(cols)) + # eigen = base.eigen(m) + # pca = stats.princomp(m, cor="TRUE") + # self.loadings = pca.rx('loadings') + # self.scores = pca.rx('scores') + # self.scale = pca.rx('scale') + + # trait_array = zScore(self.trait_data_array) + # trait_array_vectors = np.dot(corr_eigen_vectors, trait_array) + + # pca_traits = [] + # for i, vector in enumerate(trait_array_vectors): + # # ZS: Check if below check is necessary + # # if corr_eigen_value[i-1] > 100.0/len(self.trait_list): + # pca_traits.append((vector * -1.0).tolist()) + + # this_group_name = self.trait_list[0][1].group.name + # temp_dataset = data_set.create_dataset( + # dataset_name="Temp", dataset_type="Temp", group_name=this_group_name) + # temp_dataset.group.get_samplelist() + # for i, pca_trait in enumerate(pca_traits): + # trait_id = "PCA" + str(i + 1) + "_" + temp_dataset.group.species + "_" + \ + # this_group_name + "_" + datetime.datetime.now().strftime("%m%d%H%M%S") + # this_vals_string = "" + # position = 0 + # for sample in temp_dataset.group.all_samples_ordered(): + # if sample in self.shared_samples_list: + # this_vals_string += str(pca_trait[position]) + # this_vals_string += " " + # position += 1 + # else: + # this_vals_string += "x " + # this_vals_string = this_vals_string[:-1] + + # Redis.set(trait_id, this_vals_string, ex=THIRTY_DAYS) + # self.pca_trait_ids.append(trait_id) + + # return pca def process_loadings(self): loadings_array = [] @@ -224,27 +237,34 @@ class CorrelationMatrix(object): for i in range(len(self.trait_list)): loadings_row = [] if len(self.trait_list) > 2: - the_range = 3 + the_range = 3 else: - the_range = 2 + the_range = 2 for j in range(the_range): - position = i + len(self.trait_list)*j + position = i + len(self.trait_list) * j loadings_row.append(self.loadings[0][position]) loadings_array.append(loadings_row) return loadings_array + def export_corr_matrix(corr_results): - corr_matrix_filename = "corr_matrix_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) - matrix_export_path = "{}{}.csv".format(GENERATED_TEXT_DIR, corr_matrix_filename) + corr_matrix_filename = "corr_matrix_" + \ + ''.join(random.choice(string.ascii_uppercase + string.digits) + for _ in range(6)) + matrix_export_path = "{}{}.csv".format( + GENERATED_TEXT_DIR, corr_matrix_filename) with open(matrix_export_path, "w+") as output_file: - output_file.write("Time/Date: " + datetime.datetime.now().strftime("%x / %X") + "\n") + output_file.write( + "Time/Date: " + datetime.datetime.now().strftime("%x / %X") + "\n") output_file.write("\n") output_file.write("Correlation ") for i, item in enumerate(corr_results[0]): - output_file.write("Trait" + str(i + 1) + ": " + str(item[0].dataset.name) + "::" + str(item[0].name) + "\t") + output_file.write("Trait" + str(i + 1) + ": " + \ + str(item[0].dataset.name) + "::" + str(item[0].name) + "\t") output_file.write("\n") for i, row in enumerate(corr_results): - output_file.write("Trait" + str(i + 1) + ": " + str(row[0][0].dataset.name) + "::" + str(row[0][0].name) + "\t") + output_file.write("Trait" + str(i + 1) + ": " + \ + str(row[0][0].dataset.name) + "::" + str(row[0][0].name) + "\t") for item in row: output_file.write(str(item[1]) + "\t") output_file.write("\n") @@ -253,16 +273,19 @@ def export_corr_matrix(corr_results): output_file.write("\n") output_file.write("N ") for i, item in enumerate(corr_results[0]): - output_file.write("Trait" + str(i) + ": " + str(item[0].dataset.name) + "::" + str(item[0].name) + "\t") + output_file.write("Trait" + str(i) + ": " + \ + str(item[0].dataset.name) + "::" + str(item[0].name) + "\t") output_file.write("\n") for i, row in enumerate(corr_results): - output_file.write("Trait" + str(i) + ": " + str(row[0][0].dataset.name) + "::" + str(row[0][0].name) + "\t") + output_file.write("Trait" + str(i) + ": " + \ + str(row[0][0].dataset.name) + "::" + str(row[0][0].name) + "\t") for item in row: output_file.write(str(item[2]) + "\t") output_file.write("\n") return corr_matrix_filename, matrix_export_path + def zScore(trait_data_array): NN = len(trait_data_array[0]) if NN < 10: @@ -271,18 +294,19 @@ def zScore(trait_data_array): i = 0 for data in trait_data_array: N = len(data) - S = reduce(lambda x, y: x+y, data, 0.) - SS = reduce(lambda x, y: x+y*y, data, 0.) - mean = S/N - var = SS - S*S/N - stdev = math.sqrt(var/(N-1)) + S = reduce(lambda x, y: x + y, data, 0.) + SS = reduce(lambda x, y: x + y * y, data, 0.) + mean = S / N + var = SS - S * S / N + stdev = math.sqrt(var / (N - 1)) if stdev == 0: stdev = 1e-100 - data2 = [(x-mean)/stdev for x in data] + data2 = [(x - mean) / stdev for x in data] trait_data_array[i] = data2 i += 1 return trait_data_array + def sortEigenVectors(vector): try: eigenValues = vector[0].tolist() @@ -298,8 +322,8 @@ def sortEigenVectors(vector): for item in combines: A.append(item[0]) B.append(item[1]) - sum = reduce(lambda x, y: x+y, A, 0.0) - A = [x*100.0/sum for x in A] + sum = reduce(lambda x, y: x + y, A, 0.0) + A = [x * 100.0 / sum for x in A] return [A, B] except: return [] diff --git a/wqflask/wqflask/ctl/ctl_analysis.py b/wqflask/wqflask/ctl/ctl_analysis.py index 72b4f3a3..bb928ec5 100644 --- a/wqflask/wqflask/ctl/ctl_analysis.py +++ b/wqflask/wqflask/ctl/ctl_analysis.py @@ -25,33 +25,39 @@ from utility.tools import locate, GN2_BRANCH_URL from rpy2.robjects.packages import importr import utility.logger -logger = utility.logger.getLogger(__name__ ) - -## Get pointers to some common R functions -r_library = ro.r["library"] # Map the library function -r_options = ro.r["options"] # Map the options function -r_t = ro.r["t"] # Map the t function -r_unlist = ro.r["unlist"] # Map the unlist function -r_list = ro.r.list # Map the list function -r_png = ro.r["png"] # Map the png function for plotting -r_dev_off = ro.r["dev.off"] # Map the dev.off function -r_write_table = ro.r["write.table"] # Map the write.table function -r_data_frame = ro.r["data.frame"] # Map the write.table function -r_as_numeric = ro.r["as.numeric"] # Map the write.table function - -class CTL(object): +logger = utility.logger.getLogger(__name__) + +# Get pointers to some common R functions +r_library = ro.r["library"] # Map the library function +r_options = ro.r["options"] # Map the options function +r_t = ro.r["t"] # Map the t function +r_unlist = ro.r["unlist"] # Map the unlist function +r_list = ro.r.list # Map the list function +r_png = ro.r["png"] # Map the png function for plotting +r_dev_off = ro.r["dev.off"] # Map the dev.off function +r_write_table = ro.r["write.table"] # Map the write.table function +r_data_frame = ro.r["data.frame"] # Map the write.table function +r_as_numeric = ro.r["as.numeric"] # Map the write.table function + + +class CTL: def __init__(self): logger.info("Initialization of CTL") #log = r_file("/tmp/genenetwork_ctl.log", open = "wt") - #r_sink(log) # Uncomment the r_sink() commands to log output from stdout/stderr to a file + # r_sink(log) # Uncomment the r_sink() commands to log output from stdout/stderr to a file #r_sink(log, type = "message") - r_library("ctl") # Load CTL - Should only be done once, since it is quite expensive - r_options(stringsAsFactors = False) + # Load CTL - Should only be done once, since it is quite expensive + r_library("ctl") + r_options(stringsAsFactors=False) logger.info("Initialization of CTL done, package loaded in R session") - self.r_CTLscan = ro.r["CTLscan"] # Map the CTLscan function - self.r_CTLsignificant = ro.r["CTLsignificant"] # Map the CTLsignificant function - self.r_lineplot = ro.r["ctl.lineplot"] # Map the ctl.lineplot function - self.r_plotCTLobject = ro.r["plot.CTLobject"] # Map the CTLsignificant function + # Map the CTLscan function + self.r_CTLscan = ro.r["CTLscan"] + # Map the CTLsignificant function + self.r_CTLsignificant = ro.r["CTLsignificant"] + # Map the ctl.lineplot function + self.r_lineplot = ro.r["ctl.lineplot"] + # Map the CTLsignificant function + self.r_plotCTLobject = ro.r["plot.CTLobject"] self.nodes_list = [] self.edges_list = [] logger.info("Obtained pointers to CTL functions") @@ -59,28 +65,29 @@ class CTL(object): self.gn2_url = GN2_BRANCH_URL def addNode(self, gt): - node_dict = { 'data' : {'id' : str(gt.name) + ":" + str(gt.dataset.name), - 'sid' : str(gt.name), - 'dataset' : str(gt.dataset.name), - 'label' : gt.name, - 'symbol' : gt.symbol, - 'geneid' : gt.geneid, - 'omim' : gt.omim } } + node_dict = {'data': {'id': str(gt.name) + ":" + str(gt.dataset.name), + 'sid': str(gt.name), + 'dataset': str(gt.dataset.name), + 'label': gt.name, + 'symbol': gt.symbol, + 'geneid': gt.geneid, + 'omim': gt.omim}} self.nodes_list.append(node_dict) def addEdge(self, gtS, gtT, significant, x): - edge_data = {'id' : str(gtS.symbol) + '_' + significant[1][x] + '_' + str(gtT.symbol), - 'source' : str(gtS.name) + ":" + str(gtS.dataset.name), - 'target' : str(gtT.name) + ":" + str(gtT.dataset.name), - 'lod' : significant[3][x], - 'color' : "#ff0000", - 'width' : significant[3][x] } - edge_dict = { 'data' : edge_data } + edge_data = {'id': str(gtS.symbol) + '_' + significant[1][x] + '_' + str(gtT.symbol), + 'source': str(gtS.name) + ":" + str(gtS.dataset.name), + 'target': str(gtT.name) + ":" + str(gtT.dataset.name), + 'lod': significant[3][x], + 'color': "#ff0000", + 'width': significant[3][x]} + edge_dict = {'data': edge_data} self.edges_list.append(edge_dict) def run_analysis(self, requestform): logger.info("Starting CTL analysis on dataset") - self.trait_db_list = [trait.strip() for trait in requestform['trait_list'].split(',')] + self.trait_db_list = [trait.strip() + for trait in requestform['trait_list'].split(',')] self.trait_db_list = [x for x in self.trait_db_list if x] logger.debug("strategy:", requestform.get("strategy")) @@ -108,45 +115,49 @@ class CTL(object): markers = [] markernames = [] for marker in parser.markers: - markernames.append(marker["name"]) - markers.append(marker["genotypes"]) + markernames.append(marker["name"]) + markers.append(marker["genotypes"]) genotypes = list(itertools.chain(*markers)) - logger.debug(len(genotypes) / len(individuals), "==", len(parser.markers)) + logger.debug(len(genotypes) / len(individuals), + "==", len(parser.markers)) - rGeno = r_t(ro.r.matrix(r_unlist(genotypes), nrow=len(markernames), ncol=len(individuals), dimnames = r_list(markernames, individuals), byrow=True)) + rGeno = r_t(ro.r.matrix(r_unlist(genotypes), nrow=len(markernames), ncol=len( + individuals), dimnames=r_list(markernames, individuals), byrow=True)) # Create a phenotype matrix traits = [] for trait in self.trait_db_list: - logger.debug("retrieving data for", trait) - if trait != "": - ts = trait.split(':') - gt = create_trait(name = ts[0], dataset_name = ts[1]) - gt = retrieve_sample_data(gt, dataset, individuals) - for ind in individuals: - if ind in list(gt.data.keys()): - traits.append(gt.data[ind].value) - else: - traits.append("-999") - - rPheno = r_t(ro.r.matrix(r_as_numeric(r_unlist(traits)), nrow=len(self.trait_db_list), ncol=len(individuals), dimnames = r_list(self.trait_db_list, individuals), byrow=True)) + logger.debug("retrieving data for", trait) + if trait != "": + ts = trait.split(':') + gt = create_trait(name=ts[0], dataset_name=ts[1]) + gt = retrieve_sample_data(gt, dataset, individuals) + for ind in individuals: + if ind in list(gt.data.keys()): + traits.append(gt.data[ind].value) + else: + traits.append("-999") + + rPheno = r_t(ro.r.matrix(r_as_numeric(r_unlist(traits)), nrow=len(self.trait_db_list), ncol=len( + individuals), dimnames=r_list(self.trait_db_list, individuals), byrow=True)) logger.debug(rPheno) # Use a data frame to store the objects - rPheno = r_data_frame(rPheno, check_names = False) - rGeno = r_data_frame(rGeno, check_names = False) + rPheno = r_data_frame(rPheno, check_names=False) + rGeno = r_data_frame(rGeno, check_names=False) # Debug: Print the genotype and phenotype files to disk #r_write_table(rGeno, "~/outputGN/geno.csv") #r_write_table(rPheno, "~/outputGN/pheno.csv") # Perform the CTL scan - res = self.r_CTLscan(rGeno, rPheno, strategy = strategy, nperm = nperm, parametric = parametric, nthreads=6) + res = self.r_CTLscan(rGeno, rPheno, strategy=strategy, + nperm=nperm, parametric=parametric, nthreads=6) # Get significant interactions - significant = self.r_CTLsignificant(res, significance = significance) + significant = self.r_CTLsignificant(res, significance=significance) # Create an image for output self.results = {} @@ -154,40 +165,54 @@ class CTL(object): self.results['imgloc1'] = GENERATED_IMAGE_DIR + self.results['imgurl1'] self.results['ctlresult'] = significant - self.results['requestform'] = requestform # Store the user specified parameters for the output page + # Store the user specified parameters for the output page + self.results['requestform'] = requestform # Create the lineplot - r_png(self.results['imgloc1'], width=1000, height=600, type='cairo-png') - self.r_lineplot(res, significance = significance) + r_png(self.results['imgloc1'], width=1000, + height=600, type='cairo-png') + self.r_lineplot(res, significance=significance) r_dev_off() - n = 2 # We start from 2, since R starts from 1 :) + # We start from 2, since R starts from 1 :) + n = 2 for trait in self.trait_db_list: - # Create the QTL like CTL plots - self.results['imgurl' + str(n)] = webqtlUtil.genRandStr("CTL_") + ".png" - self.results['imgloc' + str(n)] = GENERATED_IMAGE_DIR + self.results['imgurl' + str(n)] - r_png(self.results['imgloc' + str(n)], width=1000, height=600, type='cairo-png') - self.r_plotCTLobject(res, (n-1), significance = significance, main='Phenotype ' + trait) - r_dev_off() - n = n + 1 + # Create the QTL like CTL plots + self.results['imgurl' + \ + str(n)] = webqtlUtil.genRandStr("CTL_") + ".png" + self.results['imgloc' + str(n)] = GENERATED_IMAGE_DIR + \ + self.results['imgurl' + str(n)] + r_png(self.results['imgloc' + str(n)], + width=1000, height=600, type='cairo-png') + self.r_plotCTLobject( + res, (n - 1), significance=significance, main='Phenotype ' + trait) + r_dev_off() + n = n + 1 # Flush any output from R sys.stdout.flush() # Create the interactive graph for cytoscape visualization (Nodes and Edges) if not isinstance(significant, ri.RNULLType): - for x in range(len(significant[0])): - logger.debug(significant[0][x], significant[1][x], significant[2][x]) # Debug to console - tsS = significant[0][x].split(':') # Source - tsT = significant[2][x].split(':') # Target - gtS = create_trait(name = tsS[0], dataset_name = tsS[1]) # Retrieve Source info from the DB - gtT = create_trait(name = tsT[0], dataset_name = tsT[1]) # Retrieve Target info from the DB - self.addNode(gtS) - self.addNode(gtT) - self.addEdge(gtS, gtT, significant, x) - - significant[0][x] = "{} ({})".format(gtS.symbol, gtS.name) # Update the trait name for the displayed table - significant[2][x] = "{} ({})".format(gtT.symbol, gtT.name) # Update the trait name for the displayed table + for x in range(len(significant[0])): + logger.debug(significant[0][x], significant[1] + [x], significant[2][x]) # Debug to console + # Source + tsS = significant[0][x].split(':') + # Target + tsT = significant[2][x].split(':') + # Retrieve Source info from the DB + gtS = create_trait(name=tsS[0], dataset_name=tsS[1]) + # Retrieve Target info from the DB + gtT = create_trait(name=tsT[0], dataset_name=tsT[1]) + self.addNode(gtS) + self.addNode(gtT) + self.addEdge(gtS, gtT, significant, x) + + # Update the trait name for the displayed table + significant[0][x] = "{} ({})".format(gtS.symbol, gtS.name) + # Update the trait name for the displayed table + significant[2][x] = "{} ({})".format(gtT.symbol, gtT.name) self.elements = json.dumps(self.nodes_list + self.edges_list) @@ -202,8 +227,8 @@ class CTL(object): self.loadImage("imgloc1", "imgdata1") n = 2 for trait in self.trait_db_list: - self.loadImage("imgloc" + str(n), "imgdata" + str(n)) - n = n + 1 + self.loadImage("imgloc" + str(n), "imgdata" + str(n)) + n = n + 1 def process_results(self, results): logger.info("Processing CTL output") @@ -213,4 +238,3 @@ class CTL(object): self.render_image(self.results) sys.stdout.flush() return(dict(template_vars)) - diff --git a/wqflask/wqflask/database.py b/wqflask/wqflask/database.py index adeed6ad..11f8d287 100644 --- a/wqflask/wqflask/database.py +++ b/wqflask/wqflask/database.py @@ -5,9 +5,6 @@ from sqlalchemy.orm import scoped_session, sessionmaker from sqlalchemy.ext.declarative import declarative_base from utility.tools import SQL_URI -import utility.logger -logger = utility.logger.getLogger(__name__ ) - engine = create_engine(SQL_URI, encoding="latin1") @@ -17,14 +14,5 @@ db_session = scoped_session(sessionmaker(autocommit=False, Base = declarative_base() Base.query = db_session.query_property() -def init_db(): - # import all modules here that might define models so that - # they will be registered properly on the metadata. Otherwise - # you will have to import them first before calling init_db() - #import yourapplication.models - logger.info("Initializing database connection") - import wqflask.model - Base.metadata.create_all(bind=engine) - logger.info("Done creating all model metadata") - -init_db() +# Initialise the db +Base.metadata.create_all(bind=engine) diff --git a/wqflask/wqflask/db_info.py b/wqflask/wqflask/db_info.py index f420b472..938c453e 100644 --- a/wqflask/wqflask/db_info.py +++ b/wqflask/wqflask/db_info.py @@ -10,7 +10,7 @@ from utility.logger import getLogger logger = getLogger(__name__) -class InfoPage(object): +class InfoPage: def __init__(self, start_vars): self.info = None self.gn_accession_id = None @@ -23,23 +23,23 @@ class InfoPage(object): def get_info(self, create=False): query_base = ("SELECT InfoPageName, GN_AccesionId, Species.MenuName, Species.TaxonomyId, Tissue.Name, InbredSet.Name, " + - "GeneChip.GeneChipName, GeneChip.GeoPlatform, AvgMethod.Name, Datasets.DatasetName, Datasets.GeoSeries, " + - "Datasets.PublicationTitle, DatasetStatus.DatasetStatusName, Datasets.Summary, Datasets.AboutCases, " + - "Datasets.AboutTissue, Datasets.AboutDataProcessing, Datasets.Acknowledgment, Datasets.ExperimentDesign, " + - "Datasets.Contributors, Datasets.Citation, Datasets.Notes, Investigators.FirstName, Investigators.LastName, " + - "Investigators.Address, Investigators.City, Investigators.State, Investigators.ZipCode, Investigators.Country, " + - "Investigators.Phone, Investigators.Email, Investigators.Url, Organizations.OrganizationName, " + - "InvestigatorId, DatasetId, DatasetStatusId, Datasets.AboutPlatform, InfoFileTitle, Specifics " + - "FROM InfoFiles " + - "LEFT JOIN Species USING (SpeciesId) " + - "LEFT JOIN Tissue USING (TissueId) " + - "LEFT JOIN InbredSet USING (InbredSetId) " + - "LEFT JOIN GeneChip USING (GeneChipId) " + - "LEFT JOIN AvgMethod USING (AvgMethodId) " + - "LEFT JOIN Datasets USING (DatasetId) " + - "LEFT JOIN Investigators USING (InvestigatorId) " + - "LEFT JOIN Organizations USING (OrganizationId) " + - "LEFT JOIN DatasetStatus USING (DatasetStatusId) WHERE ") + "GeneChip.GeneChipName, GeneChip.GeoPlatform, AvgMethod.Name, Datasets.DatasetName, Datasets.GeoSeries, " + + "Datasets.PublicationTitle, DatasetStatus.DatasetStatusName, Datasets.Summary, Datasets.AboutCases, " + + "Datasets.AboutTissue, Datasets.AboutDataProcessing, Datasets.Acknowledgment, Datasets.ExperimentDesign, " + + "Datasets.Contributors, Datasets.Citation, Datasets.Notes, Investigators.FirstName, Investigators.LastName, " + + "Investigators.Address, Investigators.City, Investigators.State, Investigators.ZipCode, Investigators.Country, " + + "Investigators.Phone, Investigators.Email, Investigators.Url, Organizations.OrganizationName, " + + "InvestigatorId, DatasetId, DatasetStatusId, Datasets.AboutPlatform, InfoFileTitle, Specifics " + + "FROM InfoFiles " + + "LEFT JOIN Species USING (SpeciesId) " + + "LEFT JOIN Tissue USING (TissueId) " + + "LEFT JOIN InbredSet USING (InbredSetId) " + + "LEFT JOIN GeneChip USING (GeneChipId) " + + "LEFT JOIN AvgMethod USING (AvgMethodId) " + + "LEFT JOIN Datasets USING (DatasetId) " + + "LEFT JOIN Investigators USING (InvestigatorId) " + + "LEFT JOIN Organizations USING (OrganizationId) " + + "LEFT JOIN DatasetStatus USING (DatasetStatusId) WHERE ") if self.gn_accession_id: final_query = query_base + \ @@ -90,6 +90,7 @@ class InfoPage(object): except Exception as e: pass + def process_query_results(results): info_ob = { 'info_page_name': results[0], @@ -134,5 +135,3 @@ def process_query_results(results): } return info_ob - - diff --git a/wqflask/wqflask/decorators.py b/wqflask/wqflask/decorators.py new file mode 100644 index 00000000..f0978fd3 --- /dev/null +++ b/wqflask/wqflask/decorators.py @@ -0,0 +1,14 @@ +"""This module contains gn2 decorators""" +from flask import g +from functools import wraps + + +def admin_login_required(f): + """Use this for endpoints where admins are required""" + @wraps(f) + def wrap(*args, **kwargs): + if g.user_session.record.get(b"user_email_address") not in [ + b"labwilliams@gmail.com"]: + return "You need to be admin", 401 + return f(*args, **kwargs) + return wrap diff --git a/wqflask/wqflask/do_search.py b/wqflask/wqflask/do_search.py index 00636563..761ae326 100644 --- a/wqflask/wqflask/do_search.py +++ b/wqflask/wqflask/do_search.py @@ -17,7 +17,7 @@ from utility.logger import getLogger logger = getLogger(__name__) -class DoSearch(object): +class DoSearch: """Parent class containing parameters/functions used for all searches""" # Used to translate search phrases into classes @@ -26,14 +26,16 @@ class DoSearch(object): def __init__(self, search_term, search_operator=None, dataset=None, search_type=None): self.search_term = search_term # Make sure search_operator is something we expect - assert search_operator in (None, "=", "<", ">", "<=", ">="), "Bad search operator" + assert search_operator in ( + None, "=", "<", ">", "<=", ">="), "Bad search operator" self.search_operator = search_operator self.dataset = dataset self.search_type = search_type if self.dataset: - #Get group information for dataset and the species id - self.species_id = webqtlDatabaseFunction.retrieve_species_id(self.dataset.group.name) + # Get group information for dataset and the species id + self.species_id = webqtlDatabaseFunction.retrieve_species_id( + self.dataset.group.name) def execute(self, query): """Executes query and returns results""" @@ -73,6 +75,7 @@ class DoSearch(object): else: return None + class MrnaAssaySearch(DoSearch): """A search within an expression dataset, including mRNA, protein, SNP, but not phenotype or metabolites""" @@ -103,12 +106,13 @@ class MrnaAssaySearch(DoSearch): search_string = escape(self.search_term[0]) if self.search_term[0] != "*": - match_clause = """((MATCH (ProbeSet.symbol) AGAINST ('%s' IN BOOLEAN MODE))) and """ % (search_string) + match_clause = """((MATCH (ProbeSet.symbol) AGAINST ('%s' IN BOOLEAN MODE))) and """ % ( + search_string) else: match_clause = "" - where_clause = (match_clause + - """ProbeSet.Id = ProbeSetXRef.ProbeSetId + where_clause = (match_clause + + """ProbeSet.Id = ProbeSetXRef.ProbeSetId and ProbeSetXRef.ProbeSetFreezeId = %s """ % (escape(str(self.dataset.id)))) @@ -130,30 +134,30 @@ class MrnaAssaySearch(DoSearch): else: match_clause = "" - where_clause = (match_clause + - """ProbeSet.Id = ProbeSetXRef.ProbeSetId + where_clause = (match_clause + + """ProbeSet.Id = ProbeSetXRef.ProbeSetId and ProbeSetXRef.ProbeSetFreezeId = %s """ % (escape(str(self.dataset.id)))) return where_clause - def compile_final_query(self, from_clause = '', where_clause = ''): + def compile_final_query(self, from_clause='', where_clause=''): """Generates the final query string""" from_clause = self.normalize_spaces(from_clause) query = (self.base_query + - """%s + """%s WHERE %s and ProbeSet.Id = ProbeSetXRef.ProbeSetId and ProbeSetXRef.ProbeSetFreezeId = %s ORDER BY ProbeSet.symbol ASC """ % (escape(from_clause), - where_clause, - escape(str(self.dataset.id)))) + where_clause, + escape(str(self.dataset.id)))) return query - def run_combined(self, from_clause = '', where_clause = ''): + def run_combined(self, from_clause='', where_clause=''): """Generates and runs a combined search of an mRNA expression dataset""" logger.debug("Running ProbeSetSearch") @@ -162,14 +166,14 @@ class MrnaAssaySearch(DoSearch): from_clause = self.normalize_spaces(from_clause) query = (self.base_query + - """%s + """%s WHERE %s and ProbeSet.Id = ProbeSetXRef.ProbeSetId and ProbeSetXRef.ProbeSetFreezeId = %s ORDER BY ProbeSet.symbol ASC """ % (escape(from_clause), - where_clause, - escape(str(self.dataset.id)))) + where_clause, + escape(str(self.dataset.id)))) return self.execute(query) @@ -195,15 +199,15 @@ class PhenotypeSearch(DoSearch): FROM Phenotype, PublishFreeze, Publication, PublishXRef """ search_fields = ('Phenotype.Post_publication_description', - 'Phenotype.Pre_publication_description', - 'Phenotype.Pre_publication_abbreviation', - 'Phenotype.Post_publication_abbreviation', - 'Phenotype.Lab_code', - 'Publication.PubMed_ID', - 'Publication.Abstract', - 'Publication.Title', - 'Publication.Authors', - 'PublishXRef.Id') + 'Phenotype.Pre_publication_description', + 'Phenotype.Pre_publication_abbreviation', + 'Phenotype.Post_publication_abbreviation', + 'Phenotype.Lab_code', + 'Publication.PubMed_ID', + 'Publication.Abstract', + 'Publication.Title', + 'Publication.Authors', + 'PublishXRef.Id') header_fields = ['Index', 'Record', @@ -218,53 +222,56 @@ class PhenotypeSearch(DoSearch): def get_where_clause(self): """Generate clause for WHERE portion of query""" - #Todo: Zach will figure out exactly what both these lines mean - #and comment here + # Todo: Zach will figure out exactly what both these lines mean + # and comment here - #if "'" not in self.search_term[0]: - search_term = "[[:<:]]" + self.handle_wildcard(self.search_term[0]) + "[[:>:]]" + # if "'" not in self.search_term[0]: + search_term = "[[:<:]]" + \ + self.handle_wildcard(self.search_term[0]) + "[[:>:]]" if "_" in self.search_term[0]: if len(self.search_term[0].split("_")[0]) == 3: - search_term = "[[:<:]]" + self.handle_wildcard(self.search_term[0].split("_")[1]) + "[[:>:]]" + search_term = "[[:<:]]" + self.handle_wildcard( + self.search_term[0].split("_")[1]) + "[[:>:]]" # This adds a clause to the query that matches the search term # against each field in the search_fields tuple where_clause_list = [] for field in self.search_fields: - where_clause_list.append('''%s REGEXP "%s"''' % (field, search_term)) + where_clause_list.append('''%s REGEXP "%s"''' % + (field, search_term)) where_clause = "(%s) " % ' OR '.join(where_clause_list) return where_clause - def compile_final_query(self, from_clause = '', where_clause = ''): + def compile_final_query(self, from_clause='', where_clause=''): """Generates the final query string""" from_clause = self.normalize_spaces(from_clause) if self.search_term[0] == "*": query = (self.base_query + - """%s + """%s WHERE PublishXRef.InbredSetId = %s and PublishXRef.PhenotypeId = Phenotype.Id and PublishXRef.PublicationId = Publication.Id and PublishFreeze.Id = %s ORDER BY PublishXRef.Id""" % ( - from_clause, - escape(str(self.dataset.group.id)), - escape(str(self.dataset.id)))) + from_clause, + escape(str(self.dataset.group.id)), + escape(str(self.dataset.id)))) else: query = (self.base_query + - """%s + """%s WHERE %s and PublishXRef.InbredSetId = %s and PublishXRef.PhenotypeId = Phenotype.Id and PublishXRef.PublicationId = Publication.Id and PublishFreeze.Id = %s ORDER BY PublishXRef.Id""" % ( - from_clause, - where_clause, - escape(str(self.dataset.group.id)), - escape(str(self.dataset.id)))) + from_clause, + where_clause, + escape(str(self.dataset.group.id)), + escape(str(self.dataset.id)))) return query @@ -276,26 +283,27 @@ class PhenotypeSearch(DoSearch): from_clause = self.normalize_spaces(from_clause) query = (self.base_query + - """%s + """%s WHERE %s PublishXRef.InbredSetId = %s and PublishXRef.PhenotypeId = Phenotype.Id and PublishXRef.PublicationId = Publication.Id and PublishFreeze.Id = %s""" % ( - from_clause, - where_clause, - escape(str(self.dataset.group.id)), - escape(str(self.dataset.id)))) + from_clause, + where_clause, + escape(str(self.dataset.group.id)), + escape(str(self.dataset.id)))) return self.execute(query) def run(self): """Generates and runs a simple search of a phenotype dataset""" - query = self.compile_final_query(where_clause = self.get_where_clause()) + query = self.compile_final_query(where_clause=self.get_where_clause()) return self.execute(query) + class GenotypeSearch(DoSearch): """A search within a genotype dataset""" @@ -328,45 +336,46 @@ class GenotypeSearch(DoSearch): for field in self.search_fields: where_clause.append('''%s REGEXP "%s"''' % ("%s.%s" % self.mescape(self.dataset.type, field), - self.search_term)) + self.search_term)) logger.debug("hello ;where_clause is:", pf(where_clause)) where_clause = "(%s) " % ' OR '.join(where_clause) return where_clause - def compile_final_query(self, from_clause = '', where_clause = ''): + def compile_final_query(self, from_clause='', where_clause=''): """Generates the final query string""" from_clause = self.normalize_spaces(from_clause) - if self.search_term[0] == "*": - query = (self.base_query + - """WHERE Geno.Id = GenoXRef.GenoId + query = (self.base_query + + """WHERE Geno.Id = GenoXRef.GenoId and GenoXRef.GenoFreezeId = GenoFreeze.Id - and GenoFreeze.Id = %s"""% (escape(str(self.dataset.id)))) + and GenoFreeze.Id = %s""" % (escape(str(self.dataset.id)))) else: query = (self.base_query + - """WHERE %s + """WHERE %s and Geno.Id = GenoXRef.GenoId and GenoXRef.GenoFreezeId = GenoFreeze.Id - and GenoFreeze.Id = %s"""% (where_clause, - escape(str(self.dataset.id)))) + and GenoFreeze.Id = %s""" % (where_clause, + escape(str(self.dataset.id)))) return query def run(self): """Generates and runs a simple search of a genotype dataset""" - #Todo: Zach will figure out exactly what both these lines mean - #and comment here + # Todo: Zach will figure out exactly what both these lines mean + # and comment here if self.search_term[0] == "*": self.query = self.compile_final_query() else: - self.query = self.compile_final_query(where_clause = self.get_where_clause()) + self.query = self.compile_final_query( + where_clause=self.get_where_clause()) return self.execute(self.query) + class RifSearch(MrnaAssaySearch): """Searches for traits with a Gene RIF entry including the search term.""" @@ -390,10 +399,11 @@ class RifSearch(MrnaAssaySearch): return self.execute(query) + class WikiSearch(MrnaAssaySearch): """Searches GeneWiki for traits other people have annotated""" - DoSearch.search_types['ProbeSet_WIKI'] = "WikiSearch" + DoSearch.search_types['ProbeSet_WIKI'] = "WikiSearch" def get_from_clause(self): return ", GeneRIF " @@ -403,7 +413,7 @@ class WikiSearch(MrnaAssaySearch): and GeneRIF.versionId=0 and GeneRIF.display>0 and (GeneRIF.comment REGEXP '%s' or GeneRIF.initial = '%s') """ % (self.dataset.type, - "[[:<:]]"+str(self.search_term[0])+"[[:>:]]", + "[[:<:]]" + str(self.search_term[0]) + "[[:>:]]", str(self.search_term[0])) return where_clause @@ -415,10 +425,11 @@ class WikiSearch(MrnaAssaySearch): return self.execute(query) + class GoSearch(MrnaAssaySearch): """Searches for synapse-associated genes listed in the Gene Ontology.""" - DoSearch.search_types['ProbeSet_GO'] = "GoSearch" + DoSearch.search_types['ProbeSet_GO'] = "GoSearch" def get_from_clause(self): from_clause = """, db_GeneOntology.term as GOterm, @@ -429,7 +440,7 @@ class GoSearch(MrnaAssaySearch): def get_where_clause(self): field = 'GOterm.acc' - go_id = 'GO:' + ('0000000'+self.search_term[0])[-7:] + go_id = 'GO:' + ('0000000' + self.search_term[0])[-7:] statements = ("""%s.symbol=GOgene_product.symbol and GOassociation.gene_product_id=GOgene_product.id and @@ -448,7 +459,9 @@ class GoSearch(MrnaAssaySearch): return self.execute(query) -#ZS: Not sure what the best way to deal with LRS searches is +# ZS: Not sure what the best way to deal with LRS searches is + + class LrsSearch(DoSearch): """Searches for genes with a QTL within the given LRS values @@ -486,17 +499,18 @@ class LrsSearch(DoSearch): assert isinstance(self.search_term, (list, tuple)) lrs_min, lrs_max = self.search_term[:2] if self.search_type == "LOD": - lrs_min = lrs_min*4.61 - lrs_max = lrs_max*4.61 + lrs_min = lrs_min * 4.61 + lrs_max = lrs_max * 4.61 where_clause = """ %sXRef.LRS > %s and %sXRef.LRS < %s """ % self.mescape(self.dataset.type, - min(lrs_min, lrs_max), + min(lrs_min, + lrs_max), self.dataset.type, max(lrs_min, lrs_max)) if len(self.search_term) > 2: - #If the user typed, for example "Chr4", the "Chr" substring needs to be removed so that all search elements can be converted to floats + # If the user typed, for example "Chr4", the "Chr" substring needs to be removed so that all search elements can be converted to floats chr_num = self.search_term[2] if "chr" in self.search_term[2].lower(): chr_num = self.search_term[2].lower().replace("chr", "") @@ -512,27 +526,27 @@ class LrsSearch(DoSearch): where_clause += """ and %sXRef.Locus = Geno.name and Geno.SpeciesId = %s """ % self.mescape(self.dataset.type, - self.species_id) + self.species_id) else: # Deal with >, <, >=, and <= logger.debug("self.search_term is:", self.search_term) lrs_val = self.search_term[0] if self.search_type == "LOD": - lrs_val = lrs_val*4.61 + lrs_val = lrs_val * 4.61 where_clause = """ %sXRef.LRS %s %s """ % self.mescape(self.dataset.type, - self.search_operator, - self.search_term[0]) + self.search_operator, + self.search_term[0]) return where_clause - def run(self): self.from_clause = self.get_from_clause() self.where_clause = self.get_where_clause() - self.query = self.compile_final_query(self.from_clause, self.where_clause) + self.query = self.compile_final_query( + self.from_clause, self.where_clause) return self.execute(self.query) @@ -546,10 +560,12 @@ class MrnaLrsSearch(LrsSearch, MrnaAssaySearch): self.from_clause = self.get_from_clause() self.where_clause = self.get_where_clause() - self.query = self.compile_final_query(from_clause = self.from_clause, where_clause = self.where_clause) + self.query = self.compile_final_query( + from_clause=self.from_clause, where_clause=self.where_clause) return self.execute(self.query) + class PhenotypeLrsSearch(LrsSearch, PhenotypeSearch): for search_key in ('LRS', 'LOD'): @@ -560,7 +576,8 @@ class PhenotypeLrsSearch(LrsSearch, PhenotypeSearch): self.from_clause = self.get_from_clause() self.where_clause = self.get_where_clause() - self.query = self.compile_final_query(from_clause = self.from_clause, where_clause = self.where_clause) + self.query = self.compile_final_query( + from_clause=self.from_clause, where_clause=self.where_clause) return self.execute(self.query) @@ -587,7 +604,8 @@ class CisTransLrsSearch(DoSearch): elif len(self.search_term) == 3: lrs_min, lrs_max, self.mb_buffer = self.search_term elif len(self.search_term) == 4: - lrs_min, lrs_max, self.mb_buffer = [float(value) for value in self.search_term[:3]] + lrs_min, lrs_max, self.mb_buffer = [ + float(value) for value in self.search_term[:3]] chromosome = self.search_term[3] if "Chr" in chromosome or "chr" in chromosome: chromosome = int(chromosome[3:]) @@ -599,19 +617,19 @@ class CisTransLrsSearch(DoSearch): lrs_max = lrs_max * 4.61 sub_clause = """ %sXRef.LRS > %s and - %sXRef.LRS < %s and """ % ( - escape(self.dataset.type), - escape(str(min(lrs_min, lrs_max))), - escape(self.dataset.type), - escape(str(max(lrs_min, lrs_max))) - ) + %sXRef.LRS < %s and """ % ( + escape(self.dataset.type), + escape(str(min(lrs_min, lrs_max))), + escape(self.dataset.type), + escape(str(max(lrs_min, lrs_max))) + ) else: # Deal with >, <, >=, and <= - sub_clause = """ %sXRef.LRS %s %s and """ % ( - escape(self.dataset.type), - escape(self.search_operator), - escape(self.search_term[0]) - ) + sub_clause = """ %sXRef.LRS %s %s and """ % ( + escape(self.dataset.type), + escape(self.search_operator), + escape(self.search_term[0]) + ) if cis_trans == "cis": where_clause = sub_clause + """ @@ -619,36 +637,42 @@ class CisTransLrsSearch(DoSearch): %sXRef.Locus = Geno.name and Geno.SpeciesId = %s and %s.Chr = Geno.Chr""" % ( - escape(self.dataset.type), - the_operator, - escape(str(self.mb_buffer)), - escape(self.dataset.type), - escape(str(self.species_id)), - escape(self.dataset.type) - ) + escape(self.dataset.type), + the_operator, + escape(str(self.mb_buffer)), + escape(self.dataset.type), + escape(str(self.species_id)), + escape(self.dataset.type) + ) else: if chromosome: location_clause = "(%s.Chr = '%s' and %s.Chr = Geno.Chr and ABS(%s.Mb-Geno.Mb) %s %s) or (%s.Chr != Geno.Chr and Geno.Chr = '%s')" % (escape(self.dataset.type), - chromosome, - escape(self.dataset.type), - escape(self.dataset.type), - the_operator, - escape(str(self.mb_buffer)), - escape(self.dataset.type), - chromosome) + chromosome, + escape( + self.dataset.type), + escape( + self.dataset.type), + the_operator, + escape( + str(self.mb_buffer)), + escape( + self.dataset.type), + chromosome) else: - location_clause = "(ABS(%s.Mb-Geno.Mb) %s %s and %s.Chr = Geno.Chr) or (%s.Chr != Geno.Chr)" % (escape(self.dataset.type), the_operator, escape(str(self.mb_buffer)), escape(self.dataset.type), escape(self.dataset.type)) + location_clause = "(ABS(%s.Mb-Geno.Mb) %s %s and %s.Chr = Geno.Chr) or (%s.Chr != Geno.Chr)" % (escape( + self.dataset.type), the_operator, escape(str(self.mb_buffer)), escape(self.dataset.type), escape(self.dataset.type)) where_clause = sub_clause + """ %sXRef.Locus = Geno.name and Geno.SpeciesId = %s and (%s)""" % ( - escape(self.dataset.type), - escape(str(self.species_id)), - location_clause - ) + escape(self.dataset.type), + escape(str(self.species_id)), + location_clause + ) return where_clause + class CisLrsSearch(CisTransLrsSearch, MrnaAssaySearch): """ Searches for genes on a particular chromosome with a cis-eQTL within the given LRS values @@ -667,7 +691,7 @@ class CisLrsSearch(CisTransLrsSearch, MrnaAssaySearch): """ for search_key in ('LRS', 'LOD'): - DoSearch.search_types['ProbeSet_CIS'+search_key] = "CisLrsSearch" + DoSearch.search_types['ProbeSet_CIS' + search_key] = "CisLrsSearch" def get_where_clause(self): return CisTransLrsSearch.get_where_clause(self, "cis") @@ -676,10 +700,12 @@ class CisLrsSearch(CisTransLrsSearch, MrnaAssaySearch): self.from_clause = self.get_from_clause() self.where_clause = self.get_where_clause() - self.query = self.compile_final_query(self.from_clause, self.where_clause) + self.query = self.compile_final_query( + self.from_clause, self.where_clause) return self.execute(self.query) + class TransLrsSearch(CisTransLrsSearch, MrnaAssaySearch): """Searches for genes on a particular chromosome with a cis-eQTL within the given LRS values @@ -697,7 +723,7 @@ class TransLrsSearch(CisTransLrsSearch, MrnaAssaySearch): """ for search_key in ('LRS', 'LOD'): - DoSearch.search_types['ProbeSet_TRANS'+search_key] = "TransLrsSearch" + DoSearch.search_types['ProbeSet_TRANS' + search_key] = "TransLrsSearch" def get_where_clause(self): return CisTransLrsSearch.get_where_clause(self, "trans") @@ -706,7 +732,8 @@ class TransLrsSearch(CisTransLrsSearch, MrnaAssaySearch): self.from_clause = self.get_from_clause() self.where_clause = self.get_where_clause() - self.query = self.compile_final_query(self.from_clause, self.where_clause) + self.query = self.compile_final_query( + self.from_clause, self.where_clause) return self.execute(self.query) @@ -725,14 +752,15 @@ class MeanSearch(MrnaAssaySearch): where_clause = """ %sXRef.mean > %s and %sXRef.mean < %s """ % self.mescape(self.dataset.type, - min(self.mean_min, self.mean_max), - self.dataset.type, - max(self.mean_min, self.mean_max)) + min(self.mean_min, + self.mean_max), + self.dataset.type, + max(self.mean_min, self.mean_max)) else: # Deal with >, <, >=, and <= where_clause = """ %sXRef.mean %s %s """ % self.mescape(self.dataset.type, - self.search_operator, - self.search_term[0]) + self.search_operator, + self.search_term[0]) return where_clause @@ -740,10 +768,11 @@ class MeanSearch(MrnaAssaySearch): self.where_clause = self.get_where_clause() logger.debug("where_clause is:", pf(self.where_clause)) - self.query = self.compile_final_query(where_clause = self.where_clause) + self.query = self.compile_final_query(where_clause=self.where_clause) return self.execute(self.query) + class RangeSearch(MrnaAssaySearch): """Searches for genes with a range of expression varying between two values""" @@ -775,10 +804,11 @@ class RangeSearch(MrnaAssaySearch): def run(self): self.where_clause = self.get_where_clause() - self.query = self.compile_final_query(where_clause = self.where_clause) + self.query = self.compile_final_query(where_clause=self.where_clause) return self.execute(self.query) + class PositionSearch(DoSearch): """Searches for genes/markers located within a specified range on a specified chromosome""" @@ -786,7 +816,8 @@ class PositionSearch(DoSearch): DoSearch.search_types[search_key] = "PositionSearch" def get_where_clause(self): - self.search_term = [float(value) if is_number(value) else value for value in self.search_term] + self.search_term = [float(value) if is_number( + value) else value for value in self.search_term] chr, self.mb_min, self.mb_max = self.search_term[:3] self.chr = str(chr).lower() self.get_chr() @@ -796,11 +827,11 @@ class PositionSearch(DoSearch): %s.Mb < %s """ % self.mescape(self.dataset.type, self.chr, self.dataset.type, - min(self.mb_min, self.mb_max), + min(self.mb_min, + self.mb_max), self.dataset.type, max(self.mb_min, self.mb_max)) - return where_clause def get_chr(self): @@ -815,36 +846,39 @@ class PositionSearch(DoSearch): def run(self): self.get_where_clause() - self.query = self.compile_final_query(where_clause = self.where_clause) + self.query = self.compile_final_query(where_clause=self.where_clause) return self.execute(self.query) + class MrnaPositionSearch(PositionSearch, MrnaAssaySearch): """Searches for genes located within a specified range on a specified chromosome""" for search_key in ('POSITION', 'POS', 'MB'): - DoSearch.search_types['ProbeSet_'+search_key] = "MrnaPositionSearch" + DoSearch.search_types['ProbeSet_' + search_key] = "MrnaPositionSearch" def run(self): self.where_clause = self.get_where_clause() - self.query = self.compile_final_query(where_clause = self.where_clause) + self.query = self.compile_final_query(where_clause=self.where_clause) return self.execute(self.query) + class GenotypePositionSearch(PositionSearch, GenotypeSearch): """Searches for genes located within a specified range on a specified chromosome""" for search_key in ('POSITION', 'POS', 'MB'): - DoSearch.search_types['Geno_'+search_key] = "GenotypePositionSearch" + DoSearch.search_types['Geno_' + search_key] = "GenotypePositionSearch" def run(self): self.where_clause = self.get_where_clause() - self.query = self.compile_final_query(where_clause = self.where_clause) + self.query = self.compile_final_query(where_clause=self.where_clause) return self.execute(self.query) + class PvalueSearch(MrnaAssaySearch): """Searches for traits with a permutationed p-value between low and high""" @@ -859,25 +893,26 @@ class PvalueSearch(MrnaAssaySearch): self.pvalue_min, self.pvalue_max = self.search_term[:2] self.where_clause = """ %sXRef.pValue > %s and %sXRef.pValue < %s """ % self.mescape( - self.dataset.type, - min(self.pvalue_min, self.pvalue_max), - self.dataset.type, - max(self.pvalue_min, self.pvalue_max)) + self.dataset.type, + min(self.pvalue_min, self.pvalue_max), + self.dataset.type, + max(self.pvalue_min, self.pvalue_max)) else: # Deal with >, <, >=, and <= self.where_clause = """ %sXRef.pValue %s %s """ % self.mescape( - self.dataset.type, - self.search_operator, - self.search_term[0]) + self.dataset.type, + self.search_operator, + self.search_term[0]) logger.debug("where_clause is:", pf(self.where_clause)) - self.query = self.compile_final_query(where_clause = self.where_clause) + self.query = self.compile_final_query(where_clause=self.where_clause) logger.sql(self.query) return self.execute(self.query) + class AuthorSearch(PhenotypeSearch): """Searches for phenotype traits with specified author(s)""" @@ -888,7 +923,7 @@ class AuthorSearch(PhenotypeSearch): self.where_clause = """ Publication.Authors REGEXP "[[:<:]]%s[[:>:]]" and """ % (self.search_term[0]) - self.query = self.compile_final_query(where_clause = self.where_clause) + self.query = self.compile_final_query(where_clause=self.where_clause) return self.execute(self.query) @@ -900,6 +935,7 @@ def is_number(s): except ValueError: return False + def get_aliases(symbol, species): if species == "mouse": symbol_string = symbol.capitalize() @@ -909,7 +945,8 @@ def get_aliases(symbol, species): return [] filtered_aliases = [] - response = requests.get(GN2_BASE_URL + "/gn3/gene/aliases/" + symbol_string) + response = requests.get( + GN2_BASE_URL + "/gn3/gene/aliases/" + symbol_string) if response: alias_list = json.loads(response.content) @@ -923,9 +960,10 @@ def get_aliases(symbol, species): return filtered_aliases + if __name__ == "__main__": - ### Usually this will be used as a library, but call it from the command line for testing - ### And it runs the code below + # Usually this will be used as a library, but call it from the command line for testing + # And it runs the code below import MySQLdb import sys diff --git a/wqflask/wqflask/docs.py b/wqflask/wqflask/docs.py index 23fc3cad..0a1a597d 100644 --- a/wqflask/wqflask/docs.py +++ b/wqflask/wqflask/docs.py @@ -5,7 +5,8 @@ from flask import g from utility.logger import getLogger logger = getLogger(__name__) -class Docs(object): + +class Docs: def __init__(self, entry, start_vars={}): sql = """ @@ -19,11 +20,10 @@ class Docs(object): self.title = self.entry.capitalize() self.content = "" else: - + self.title = result[0] self.content = result[1].decode("utf-8") - self.editable = "false" # ZS: Removing option to edit to see if text still gets vandalized try: @@ -35,11 +35,13 @@ class Docs(object): def update_text(start_vars): content = start_vars['ckcontent'] - content = content.replace('%', '%%').replace('"', '\\"').replace("'", "\\'") + content = content.replace('%', '%%').replace( + '"', '\\"').replace("'", "\\'") try: if g.user_session.record['user_email_address'] == "zachary.a.sloan@gmail.com" or g.user_session.record['user_email_address'] == "labwilliams@gmail.com": - sql = "UPDATE Docs SET content='{0}' WHERE entry='{1}';".format(content, start_vars['entry_type']) + sql = "UPDATE Docs SET content='{0}' WHERE entry='{1}';".format( + content, start_vars['entry_type']) g.db.execute(sql) except: pass diff --git a/wqflask/wqflask/export_traits.py b/wqflask/wqflask/export_traits.py index 6fb760e0..5459dc31 100644 --- a/wqflask/wqflask/export_traits.py +++ b/wqflask/wqflask/export_traits.py @@ -1,6 +1,6 @@ import csv import xlsxwriter -import io +import io import datetime import itertools @@ -13,13 +13,14 @@ from base.trait import create_trait, retrieve_trait_info from pprint import pformat as pf from utility.logger import getLogger -logger = getLogger(__name__ ) +logger = getLogger(__name__) + def export_search_results_csv(targs): table_data = json.loads(targs['export_data']) table_rows = table_data['rows'] - + now = datetime.datetime.now() time_str = now.strftime('%H:%M_%d%B%Y') if 'file_name' in targs: @@ -34,9 +35,12 @@ def export_search_results_csv(targs): metadata.append(["Data Set: " + targs['database_name']]) if 'accession_id' in targs: if targs['accession_id'] != "None": - metadata.append(["Metadata Link: http://genenetwork.org/webqtl/main.py?FormID=sharinginfo&GN_AccessionId=" + targs['accession_id']]) - metadata.append(["Export Date: " + datetime.datetime.now().strftime("%B %d, %Y")]) - metadata.append(["Export Time: " + datetime.datetime.now().strftime("%H:%M GMT")]) + metadata.append( + ["Metadata Link: http://genenetwork.org/webqtl/main.py?FormID=sharinginfo&GN_AccessionId=" + targs['accession_id']]) + metadata.append( + ["Export Date: " + datetime.datetime.now().strftime("%B %d, %Y")]) + metadata.append( + ["Export Time: " + datetime.datetime.now().strftime("%H:%M GMT")]) if 'search_string' in targs: if targs['search_string'] != "None": metadata.append(["Search Query: " + targs['search_string']]) @@ -51,22 +55,28 @@ def export_search_results_csv(targs): for trait in table_rows: trait_name, dataset_name, _hash = trait.split(":") trait_ob = create_trait(name=trait_name, dataset_name=dataset_name) - trait_ob = retrieve_trait_info(trait_ob, trait_ob.dataset, get_qtl_info=True) + trait_ob = retrieve_trait_info( + trait_ob, trait_ob.dataset, get_qtl_info=True) trait_list.append(trait_ob) - table_headers = ['Index', 'URL', 'Species', 'Group', 'Dataset', 'Record ID', 'Symbol', 'Description', 'ProbeTarget', 'PubMed_ID', 'Chr', 'Mb', 'Alias', 'Gene_ID', 'Homologene_ID', 'UniGene_ID', 'Strand_Probe', 'Probe_set_specificity', 'Probe_set_BLAT_score', 'Probe_set_BLAT_Mb_start', 'Probe_set_BLAT_Mb_end', 'QTL_Chr', 'QTL_Mb', 'Locus_at_Peak', 'Max_LRS', 'P_value_of_MAX', 'Mean_Expression'] + table_headers = ['Index', 'URL', 'Species', 'Group', 'Dataset', 'Record ID', 'Symbol', 'Description', 'ProbeTarget', 'PubMed_ID', 'Chr', 'Mb', 'Alias', 'Gene_ID', 'Homologene_ID', 'UniGene_ID', + 'Strand_Probe', 'Probe_set_specificity', 'Probe_set_BLAT_score', 'Probe_set_BLAT_Mb_start', 'Probe_set_BLAT_Mb_end', 'QTL_Chr', 'QTL_Mb', 'Locus_at_Peak', 'Max_LRS', 'P_value_of_MAX', 'Mean_Expression'] traits_by_group = sort_traits_by_group(trait_list) file_list = [] - for group in list(traits_by_group.keys()): + for group in traits_by_group: group_traits = traits_by_group[group] + samplelist = group_traits[0].dataset.group.samplelist + if not samplelist: + continue + buff = io.StringIO() writer = csv.writer(buff) csv_rows = [] sample_headers = [] - for sample in group_traits[0].dataset.group.samplelist: + for sample in samplelist: sample_headers.append(sample) sample_headers.append(sample + "_SE") @@ -86,7 +96,8 @@ def export_search_results_csv(targs): trait_symbol = "N/A" row_contents = [ i + 1, - "https://genenetwork.org/show_trait?trait_id=" + str(trait.name) + "&dataset=" + str(trait.dataset.name), + "https://genenetwork.org/show_trait?trait_id=" + \ + str(trait.name) + "&dataset=" + str(trait.dataset.name), trait.dataset.group.species, trait.dataset.group.name, trait.dataset.name, @@ -116,13 +127,15 @@ def export_search_results_csv(targs): for sample in trait.dataset.group.samplelist: if sample in trait.data: - row_contents += [trait.data[sample].value, trait.data[sample].variance] + row_contents += [trait.data[sample].value, + trait.data[sample].variance] else: row_contents += ["x", "x"] csv_rows.append(row_contents) - csv_rows = list(map(list, itertools.zip_longest(*[row for row in csv_rows]))) + csv_rows = list( + map(list, itertools.zip_longest(*[row for row in csv_rows]))) writer.writerows(csv_rows) csv_data = buff.getvalue() buff.close() @@ -132,6 +145,7 @@ def export_search_results_csv(targs): return file_list + def sort_traits_by_group(trait_list=[]): traits_by_group = {} for trait in trait_list: diff --git a/wqflask/wqflask/external_tools/send_to_bnw.py b/wqflask/wqflask/external_tools/send_to_bnw.py index efa17f05..c1b14ede 100644 --- a/wqflask/wqflask/external_tools/send_to_bnw.py +++ b/wqflask/wqflask/external_tools/send_to_bnw.py @@ -1,4 +1,4 @@ -## Copyright (C) University of Tennessee Health Science Center, Memphis, TN. +# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License @@ -22,11 +22,13 @@ from base.trait import GeneralTrait from utility import helper_functions, corr_result_helpers import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) -class SendToBNW(object): + +class SendToBNW: def __init__(self, start_vars): - trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')] + trait_db_list = [trait.strip() + for trait in start_vars['trait_list'].split(',')] helper_functions.get_trait_db_obs(self, trait_db_list) trait_samples_list = [] @@ -38,9 +40,10 @@ class SendToBNW(object): trait1_samples = list(this_sample_data.keys()) trait_samples_list.append(trait1_samples) - shared_samples = list(set(trait_samples_list[0]).intersection(*trait_samples_list)) + shared_samples = list( + set(trait_samples_list[0]).intersection(*trait_samples_list)) - self.form_value = "" #ZS: string that is passed to BNW through form + self.form_value = "" # ZS: string that is passed to BNW through form values_list = [] for trait_db in self.trait_list: this_trait = trait_db[0] diff --git a/wqflask/wqflask/external_tools/send_to_geneweaver.py b/wqflask/wqflask/external_tools/send_to_geneweaver.py index 4c958a88..9a4f7150 100644 --- a/wqflask/wqflask/external_tools/send_to_geneweaver.py +++ b/wqflask/wqflask/external_tools/send_to_geneweaver.py @@ -1,4 +1,4 @@ -## Copyright (C) University of Tennessee Health Science Center, Memphis, TN. +# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License @@ -27,11 +27,13 @@ from base.species import TheSpecies from utility import helper_functions, corr_result_helpers import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) -class SendToGeneWeaver(object): + +class SendToGeneWeaver: def __init__(self, start_vars): - trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')] + trait_db_list = [trait.strip() + for trait in start_vars['trait_list'].split(',')] helper_functions.get_trait_db_obs(self, trait_db_list) self.chip_name = test_chip(self.trait_list) @@ -52,11 +54,12 @@ class SendToGeneWeaver(object): trait_name_list = get_trait_name_list(self.trait_list) self.hidden_vars = { - 'client': "genenetwork", - 'species': species_name, - 'idtype': self.chip_name, - 'list': ",".join(trait_name_list), - } + 'client': "genenetwork", + 'species': species_name, + 'idtype': self.chip_name, + 'list': ",".join(trait_name_list), + } + def get_trait_name_list(trait_list): name_list = [] @@ -65,6 +68,7 @@ def get_trait_name_list(trait_list): return name_list + def test_chip(trait_list): final_chip_name = "" @@ -74,7 +78,7 @@ def test_chip(trait_list): FROM GeneChip, ProbeFreeze, ProbeSetFreeze WHERE GeneChip.Id = ProbeFreeze.ChipId and ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and - ProbeSetFreeze.Name = '%s'""" % dataset.name).fetchone() + ProbeSetFreeze.Name = '%s'""" % dataset.name).fetchone() if result: chip_name = result[0] diff --git a/wqflask/wqflask/external_tools/send_to_webgestalt.py b/wqflask/wqflask/external_tools/send_to_webgestalt.py index 2f068792..6e74f4fe 100644 --- a/wqflask/wqflask/external_tools/send_to_webgestalt.py +++ b/wqflask/wqflask/external_tools/send_to_webgestalt.py @@ -1,4 +1,4 @@ -## Copyright (C) University of Tennessee Health Science Center, Memphis, TN. +# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License @@ -27,11 +27,13 @@ from base.species import TheSpecies from utility import helper_functions, corr_result_helpers import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) -class SendToWebGestalt(object): + +class SendToWebGestalt: def __init__(self, start_vars): - trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')] + trait_db_list = [trait.strip() + for trait in start_vars['trait_list'].split(',')] helper_functions.get_trait_db_obs(self, trait_db_list) self.chip_name = test_chip(self.trait_list) @@ -46,18 +48,18 @@ class SendToWebGestalt(object): id_type = "entrezgene" - self.hidden_vars = { - 'gene_list' : "\n".join(gene_id_list), - 'id_type' : "entrezgene", - 'ref_set' : "genome", - 'enriched_database_category' : "geneontology", - 'enriched_database_name' : "Biological_Process", - 'sig_method' : "fdr", - 'sig_value' : "0.05", - 'enrich_method' : "ORA", - 'fdr_method' : "BH", - 'min_num' : "2" - } + self.hidden_vars = { + 'gene_list': "\n".join(gene_id_list), + 'id_type': "entrezgene", + 'ref_set': "genome", + 'enriched_database_category': "geneontology", + 'enriched_database_name': "Biological_Process", + 'sig_method': "fdr", + 'sig_value': "0.05", + 'enrich_method': "ORA", + 'fdr_method': "BH", + 'min_num': "2" + } species = self.trait_list[0][1].group.species if species == "rat": @@ -69,6 +71,7 @@ class SendToWebGestalt(object): else: self.hidden_vars['organism'] = "others" + def test_chip(trait_list): final_chip_name = "" @@ -78,7 +81,7 @@ def test_chip(trait_list): FROM GeneChip, ProbeFreeze, ProbeSetFreeze WHERE GeneChip.Id = ProbeFreeze.ChipId and ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and - ProbeSetFreeze.Name = '%s'""" % dataset.name).fetchone() + ProbeSetFreeze.Name = '%s'""" % dataset.name).fetchone() if result: chip_name = result[0] @@ -113,6 +116,7 @@ def test_chip(trait_list): return chip_name + def gen_gene_id_list(trait_list): trait_name_list = [] gene_id_list = [] diff --git a/wqflask/wqflask/group_manager.py b/wqflask/wqflask/group_manager.py index 69ee9623..04a100ba 100644 --- a/wqflask/wqflask/group_manager.py +++ b/wqflask/wqflask/group_manager.py @@ -1,4 +1,5 @@ -import random, string +import random +import string from flask import (Flask, g, render_template, url_for, request, make_response, redirect, flash) @@ -7,149 +8,168 @@ from wqflask import app from wqflask.user_login import send_verification_email, send_invitation_email, basic_info, set_password from utility.redis_tools import get_user_groups, get_group_info, save_user, create_group, delete_group, add_users_to_group, remove_users_from_group, \ - change_group_name, save_verification_code, check_verification_code, get_user_by_unique_column, get_resources, get_resource_info + change_group_name, save_verification_code, check_verification_code, get_user_by_unique_column, get_resources, get_resource_info from utility.logger import getLogger logger = getLogger(__name__) + @app.route("/groups/manage", methods=('GET', 'POST')) def manage_groups(): - params = request.form if request.form else request.args - if "add_new_group" in params: - return redirect(url_for('add_group')) - else: - admin_groups, member_groups = get_user_groups(g.user_session.user_id) - return render_template("admin/group_manager.html", admin_groups=admin_groups, member_groups=member_groups) + params = request.form if request.form else request.args + if "add_new_group" in params: + return redirect(url_for('add_group')) + else: + admin_groups, member_groups = get_user_groups(g.user_session.user_id) + return render_template("admin/group_manager.html", admin_groups=admin_groups, member_groups=member_groups) + @app.route("/groups/view", methods=('GET', 'POST')) def view_group(): - params = request.form if request.form else request.args - group_id = params['id'] - group_info = get_group_info(group_id) - admins_info = [] - user_is_admin = False - if g.user_session.user_id in group_info['admins']: - user_is_admin = True - for user_id in group_info['admins']: - if user_id: - user_info = get_user_by_unique_column("user_id", user_id) - admins_info.append(user_info) - members_info = [] - for user_id in group_info['members']: - if user_id: - user_info = get_user_by_unique_column("user_id", user_id) - members_info.append(user_info) - - #ZS: This whole part might not scale well with many resources - resources_info = [] - all_resources = get_resources() - for resource_id in all_resources: - resource_info = get_resource_info(resource_id) - group_masks = resource_info['group_masks'] - if group_id in group_masks: - this_resource = {} - privileges = group_masks[group_id] - this_resource['id'] = resource_id - this_resource['name'] = resource_info['name'] - this_resource['data'] = privileges['data'] - this_resource['metadata'] = privileges['metadata'] - this_resource['admin'] = privileges['admin'] - resources_info.append(this_resource) - - return render_template("admin/view_group.html", group_info=group_info, admins=admins_info, members=members_info, user_is_admin=user_is_admin, resources=resources_info) + params = request.form if request.form else request.args + group_id = params['id'] + group_info = get_group_info(group_id) + admins_info = [] + user_is_admin = False + if g.user_session.user_id in group_info['admins']: + user_is_admin = True + for user_id in group_info['admins']: + if user_id: + user_info = get_user_by_unique_column("user_id", user_id) + admins_info.append(user_info) + members_info = [] + for user_id in group_info['members']: + if user_id: + user_info = get_user_by_unique_column("user_id", user_id) + members_info.append(user_info) + + # ZS: This whole part might not scale well with many resources + resources_info = [] + all_resources = get_resources() + for resource_id in all_resources: + resource_info = get_resource_info(resource_id) + group_masks = resource_info['group_masks'] + if group_id in group_masks: + this_resource = {} + privileges = group_masks[group_id] + this_resource['id'] = resource_id + this_resource['name'] = resource_info['name'] + this_resource['data'] = privileges['data'] + this_resource['metadata'] = privileges['metadata'] + this_resource['admin'] = privileges['admin'] + resources_info.append(this_resource) + + return render_template("admin/view_group.html", group_info=group_info, admins=admins_info, members=members_info, user_is_admin=user_is_admin, resources=resources_info) + @app.route("/groups/remove", methods=('POST',)) def remove_groups(): - group_ids_to_remove = request.form['selected_group_ids'] - for group_id in group_ids_to_remove.split(":"): - delete_group(g.user_session.user_id, group_id) + group_ids_to_remove = request.form['selected_group_ids'] + for group_id in group_ids_to_remove.split(":"): + delete_group(g.user_session.user_id, group_id) + + return redirect(url_for('manage_groups')) - return redirect(url_for('manage_groups')) @app.route("/groups/remove_users", methods=('POST',)) def remove_users(): - group_id = request.form['group_id'] - admin_ids_to_remove = request.form['selected_admin_ids'] - member_ids_to_remove = request.form['selected_member_ids'] + group_id = request.form['group_id'] + admin_ids_to_remove = request.form['selected_admin_ids'] + member_ids_to_remove = request.form['selected_member_ids'] - remove_users_from_group(g.user_session.user_id, admin_ids_to_remove.split(":"), group_id, user_type="admins") - remove_users_from_group(g.user_session.user_id, member_ids_to_remove.split(":"), group_id, user_type="members") + remove_users_from_group(g.user_session.user_id, admin_ids_to_remove.split( + ":"), group_id, user_type="admins") + remove_users_from_group(g.user_session.user_id, member_ids_to_remove.split( + ":"), group_id, user_type="members") + + return redirect(url_for('view_group', id=group_id)) - return redirect(url_for('view_group', id=group_id)) @app.route("/groups/add_<path:user_type>", methods=('POST',)) def add_users(user_type='members'): - group_id = request.form['group_id'] - if user_type == "admins": - user_emails = request.form['admin_emails_to_add'].split(",") - add_users_to_group(g.user_session.user_id, group_id, user_emails, admins = True) - elif user_type == "members": - user_emails = request.form['member_emails_to_add'].split(",") - add_users_to_group(g.user_session.user_id, group_id, user_emails, admins = False) + group_id = request.form['group_id'] + if user_type == "admins": + user_emails = request.form['admin_emails_to_add'].split(",") + add_users_to_group(g.user_session.user_id, group_id, + user_emails, admins=True) + elif user_type == "members": + user_emails = request.form['member_emails_to_add'].split(",") + add_users_to_group(g.user_session.user_id, group_id, + user_emails, admins=False) + + return redirect(url_for('view_group', id=group_id)) - return redirect(url_for('view_group', id=group_id)) @app.route("/groups/change_name", methods=('POST',)) def change_name(): - group_id = request.form['group_id'] - new_name = request.form['new_name'] - group_info = change_group_name(g.user_session.user_id, group_id, new_name) + group_id = request.form['group_id'] + new_name = request.form['new_name'] + group_info = change_group_name(g.user_session.user_id, group_id, new_name) + + return new_name - return new_name @app.route("/groups/create", methods=('GET', 'POST')) def add_or_edit_group(): - params = request.form if request.form else request.args - if "group_name" in params: - member_user_ids = set() - admin_user_ids = set() - admin_user_ids.add(g.user_session.user_id) #ZS: Always add the user creating the group as an admin - if "admin_emails_to_add" in params: - admin_emails = params['admin_emails_to_add'].split(",") - for email in admin_emails: - user_details = get_user_by_unique_column("email_address", email) - if user_details: - admin_user_ids.add(user_details['user_id']) - #send_group_invites(params['group_id'], user_email_list = admin_emails, user_type="admins") - if "member_emails_to_add" in params: - member_emails = params['member_emails_to_add'].split(",") - for email in member_emails: - user_details = get_user_by_unique_column("email_address", email) - if user_details: - member_user_ids.add(user_details['user_id']) - #send_group_invites(params['group_id'], user_email_list = user_emails, user_type="members") - - create_group(list(admin_user_ids), list(member_user_ids), params['group_name']) - return redirect(url_for('manage_groups')) - else: - return render_template("admin/create_group.html") - -#ZS: Will integrate this later, for now just letting users be added directly -def send_group_invites(group_id, user_email_list = [], user_type="members"): - for user_email in user_email_list: - user_details = get_user_by_unique_column("email_address", user_email) - if user_details: - group_info = get_group_info(group_id) - #ZS: Probably not necessary since the group should normally always exist if group_id is being passed here, - # but it's technically possible to hit it if Redis is cleared out before submitting the new users or something - if group_info: - #ZS: Don't add user if they're already an admin or if they're being added a regular user and are already a regular user, - # but do add them if they're a regular user and are added as an admin - if (user_details['user_id'] in group_info['admins']) or \ - ((user_type == "members") and (user_details['user_id'] in group_info['members'])): - continue - else: - send_verification_email(user_details, template_name = "email/group_verification.txt", key_prefix = "verification_code", subject = "You've been invited to join a GeneNetwork user group") - else: - temp_password = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) - user_details = { - 'user_id': str(uuid.uuid4()), - 'email_address': user_email, - 'registration_info': basic_info(), - 'password': set_password(temp_password), - 'confirmed': 0 - } - save_user(user_details, user_details['user_id']) - send_invitation_email(user_email, temp_password) - -#@app.route() + params = request.form if request.form else request.args + if "group_name" in params: + member_user_ids = set() + admin_user_ids = set() + # ZS: Always add the user creating the group as an admin + admin_user_ids.add(g.user_session.user_id) + if "admin_emails_to_add" in params: + admin_emails = params['admin_emails_to_add'].split(",") + for email in admin_emails: + user_details = get_user_by_unique_column( + "email_address", email) + if user_details: + admin_user_ids.add(user_details['user_id']) + #send_group_invites(params['group_id'], user_email_list = admin_emails, user_type="admins") + if "member_emails_to_add" in params: + member_emails = params['member_emails_to_add'].split(",") + for email in member_emails: + user_details = get_user_by_unique_column( + "email_address", email) + if user_details: + member_user_ids.add(user_details['user_id']) + #send_group_invites(params['group_id'], user_email_list = user_emails, user_type="members") + + create_group(list(admin_user_ids), list( + member_user_ids), params['group_name']) + return redirect(url_for('manage_groups')) + else: + return render_template("admin/create_group.html") + +# ZS: Will integrate this later, for now just letting users be added directly + + +def send_group_invites(group_id, user_email_list=[], user_type="members"): + for user_email in user_email_list: + user_details = get_user_by_unique_column("email_address", user_email) + if user_details: + group_info = get_group_info(group_id) + # ZS: Probably not necessary since the group should normally always exist if group_id is being passed here, + # but it's technically possible to hit it if Redis is cleared out before submitting the new users or something + if group_info: + # ZS: Don't add user if they're already an admin or if they're being added a regular user and are already a regular user, + # but do add them if they're a regular user and are added as an admin + if (user_details['user_id'] in group_info['admins']) or \ + ((user_type == "members") and (user_details['user_id'] in group_info['members'])): + continue + else: + send_verification_email(user_details, template_name="email/group_verification.txt", + key_prefix="verification_code", subject="You've been invited to join a GeneNetwork user group") + else: + temp_password = ''.join(random.choice( + string.ascii_uppercase + string.digits) for _ in range(6)) + user_details = { + 'user_id': str(uuid.uuid4()), + 'email_address': user_email, + 'registration_info': basic_info(), + 'password': set_password(temp_password), + 'confirmed': 0 + } + save_user(user_details, user_details['user_id']) + send_invitation_email(user_email, temp_password) + +# @app.route() diff --git a/wqflask/wqflask/gsearch.py b/wqflask/wqflask/gsearch.py index 907f1180..2516e4fb 100644 --- a/wqflask/wqflask/gsearch.py +++ b/wqflask/wqflask/gsearch.py @@ -18,7 +18,8 @@ from utility.type_checking import is_float, is_int, is_str, get_float, get_int, from utility.logger import getLogger logger = getLogger(__name__) -class GSearch(object): + +class GSearch: def __init__(self, kw): assert('type' in kw) @@ -76,33 +77,48 @@ class GSearch(object): this_trait['name'] = line[5] this_trait['dataset'] = line[3] this_trait['dataset_fullname'] = line[4] - this_trait['hmac'] = hmac.data_hmac('{}:{}'.format(line[5], line[3])) + this_trait['hmac'] = hmac.data_hmac( + '{}:{}'.format(line[5], line[3])) this_trait['species'] = line[0] this_trait['group'] = line[1] this_trait['tissue'] = line[2] this_trait['symbol'] = line[6] if line[7]: - this_trait['description'] = line[7].decode('utf-8', 'replace') + this_trait['description'] = line[7].decode( + 'utf-8', 'replace') else: this_trait['description'] = "N/A" this_trait['location_repr'] = 'N/A' if (line[8] != "NULL" and line[8] != "") and (line[9] != 0): - this_trait['location_repr'] = 'Chr%s: %.6f' % (line[8], float(line[9])) - try: - this_trait['mean'] = '%.3f' % line[10] - except: - this_trait['mean'] = "N/A" + this_trait['location_repr'] = 'Chr%s: %.6f' % ( + line[8], float(line[9])) + this_trait['LRS_score_repr'] = "N/A" + this_trait['additive'] = "N/A" + this_trait['mean'] = "N/A" + if line[11] != "" and line[11] != None: - this_trait['LRS_score_repr'] = '%3.1f' % line[11] + this_trait['LRS_score_repr'] = f"{line[11]:.3f}" + if line[14] != "" and line[14] != None: + this_trait['additive'] = f"{line[14]:.3f}" + if line[10] != "" and line[10] != None: + this_trait['mean'] = f"{line[10]:.3f}" + + locus_chr = line[16] + locus_mb = line[17] + + max_lrs_text = "N/A" + if locus_chr and locus_mb: + max_lrs_text = f"Chr{locus_chr}: {locus_mb}" + this_trait['max_lrs_text'] = max_lrs_text + this_trait['additive'] = "N/A" if line[14] != "" and line[14] != None: this_trait['additive'] = '%.3f' % line[14] this_trait['dataset_id'] = line[15] - this_trait['locus_chr'] = line[16] - this_trait['locus_mb'] = line[17] - dataset_ob = SimpleNamespace(id=this_trait["dataset_id"], type="ProbeSet",species=this_trait["species"]) + dataset_ob = SimpleNamespace( + id=this_trait["dataset_id"], type="ProbeSet", species=this_trait["species"]) if dataset_ob.id not in dataset_to_permissions: permissions = check_resource_availability(dataset_ob) dataset_to_permissions[dataset_ob.id] = permissions @@ -115,29 +131,40 @@ class GSearch(object): if permissions['data'] == 'no-access': continue - max_lrs_text = "N/A" - if this_trait['locus_chr'] != None and this_trait['locus_mb'] != None: - max_lrs_text = "Chr" + str(this_trait['locus_chr']) + ": " + str(this_trait['locus_mb']) - this_trait['max_lrs_text'] = max_lrs_text - trait_list.append(this_trait) self.trait_count = len(trait_list) - self.trait_list = json.dumps(trait_list) + self.trait_list = trait_list self.header_fields = ['Index', - 'Record', - 'Species', - 'Group', - 'Tissue', - 'Dataset', - 'Symbol', - 'Description', - 'Location', - 'Mean', - 'Max LRS', - 'Max LRS Location', - 'Additive Effect'] + 'Record', + 'Species', + 'Group', + 'Tissue', + 'Dataset', + 'Symbol', + 'Description', + 'Location', + 'Mean', + 'Max LRS', + 'Max LRS Location', + 'Additive Effect'] + + self.header_data_names = [ + 'index', + 'name', + 'species', + 'group', + 'tissue', + 'dataset_fullname', + 'symbol', + 'description', + 'location_repr', + 'mean', + 'LRS_score_repr', + 'max_lrs_text', + 'additive', + ] elif self.type == "phenotype": search_term = self.terms @@ -145,7 +172,8 @@ class GSearch(object): if "_" in self.terms: if len(self.terms.split("_")[0]) == 3: search_term = self.terms.split("_")[1] - group_clause = "AND InbredSet.`InbredSetCode` = '{}'".format(self.terms.split("_")[0]) + group_clause = "AND InbredSet.`InbredSetCode` = '{}'".format( + self.terms.split("_")[0]) sql = """ SELECT Species.`Name`, @@ -161,24 +189,23 @@ class GSearch(object): PublishXRef.`LRS`, PublishXRef.`additive`, InbredSet.`InbredSetCode`, - PublishXRef.`mean` - FROM Species,InbredSet,PublishFreeze,PublishXRef,Phenotype,Publication - WHERE PublishXRef.`InbredSetId`=InbredSet.`Id` - AND PublishFreeze.`InbredSetId`=InbredSet.`Id` - AND InbredSet.`SpeciesId`=Species.`Id` + PublishXRef.`mean`, + PublishFreeze.Id, + Geno.Chr as geno_chr, + Geno.Mb as geno_mb + FROM Species + INNER JOIN InbredSet ON InbredSet.`SpeciesId`=Species.`Id` + INNER JOIN PublishFreeze ON PublishFreeze.`InbredSetId`=InbredSet.`Id` + INNER JOIN PublishXRef ON PublishXRef.`InbredSetId`=InbredSet.`Id` + INNER JOIN Phenotype ON PublishXRef.`PhenotypeId`=Phenotype.`Id` + INNER JOIN Publication ON PublishXRef.`PublicationId`=Publication.`Id` + LEFT JOIN Geno ON PublishXRef.Locus = Geno.Name AND Geno.SpeciesId = Species.Id + WHERE + ( + (MATCH (Phenotype.Post_publication_description, Phenotype.Pre_publication_description, Phenotype.Pre_publication_abbreviation, Phenotype.Post_publication_abbreviation, Phenotype.Lab_code) AGAINST ('{1}' IN BOOLEAN MODE) ) + OR (MATCH (Publication.Abstract, Publication.Title, Publication.Authors) AGAINST ('{1}' IN BOOLEAN MODE) ) + ) {0} - AND PublishXRef.`PhenotypeId`=Phenotype.`Id` - AND PublishXRef.`PublicationId`=Publication.`Id` - AND (Phenotype.Post_publication_description REGEXP "[[:<:]]{1}[[:>:]]" - OR Phenotype.Pre_publication_description REGEXP "[[:<:]]{1}[[:>:]]" - OR Phenotype.Pre_publication_abbreviation REGEXP "[[:<:]]{1}[[:>:]]" - OR Phenotype.Post_publication_abbreviation REGEXP "[[:<:]]{1}[[:>:]]" - OR Phenotype.Lab_code REGEXP "[[:<:]]{1}[[:>:]]" - OR Publication.PubMed_ID REGEXP "[[:<:]]{1}[[:>:]]" - OR Publication.Abstract REGEXP "[[:<:]]{1}[[:>:]]" - OR Publication.Title REGEXP "[[:<:]]{1}[[:>:]]" - OR Publication.Authors REGEXP "[[:<:]]{1}[[:>:]]" - OR PublishXRef.Id REGEXP "[[:<:]]{1}[[:>:]]") ORDER BY Species.`Name`, InbredSet.`Name`, PublishXRef.`Id` LIMIT 6000 """.format(group_clause, search_term) @@ -191,58 +218,73 @@ class GSearch(object): this_trait['index'] = i + 1 this_trait['name'] = str(line[4]) if len(str(line[12])) == 3: - this_trait['display_name'] = str(line[12]) + "_" + this_trait['name'] + this_trait['display_name'] = str( + line[12]) + "_" + this_trait['name'] else: this_trait['display_name'] = this_trait['name'] this_trait['dataset'] = line[2] this_trait['dataset_fullname'] = line[3] - this_trait['hmac'] = hmac.data_hmac('{}:{}'.format(line[4], line[2])) + this_trait['hmac'] = hmac.data_hmac( + '{}:{}'.format(line[4], line[2])) this_trait['species'] = line[0] this_trait['group'] = line[1] if line[9] != None and line[6] != None: - this_trait['description'] = line[6].decode('utf-8', 'replace') + this_trait['description'] = line[6].decode( + 'utf-8', 'replace') elif line[5] != None: - this_trait['description'] = line[5].decode('utf-8', 'replace') + this_trait['description'] = line[5].decode( + 'utf-8', 'replace') else: this_trait['description'] = "N/A" - if line[13] != None and line[13] != "": + this_trait['dataset_id'] = line[14] + + this_trait['LRS_score_repr'] = "N/A" + this_trait['additive'] = "N/A" + this_trait['mean'] = "N/A" + + if line[10] != "" and line[10] != None: + this_trait['LRS_score_repr'] = f"{line[10]:.3f}" + # Some Max LRS values in the DB are wrongly listed as 0.000, but shouldn't be displayed + if this_trait['LRS_score_repr'] == "0.000": + this_trait['LRS_score_repr'] = "N/A" + if line[11] != "" and line[11] != None: + this_trait['additive'] = f"{line[11]:.3f}" + if line[13] != "" and line[13] != None: this_trait['mean'] = f"{line[13]:.3f}" - else: - this_trait['mean'] = "N/A" + + locus_chr = line[15] + locus_mb = line[16] + + max_lrs_text = "N/A" + if locus_chr and locus_mb: + max_lrs_text = f"Chr{locus_chr}: {locus_mb}" + this_trait['max_lrs_text'] = max_lrs_text + this_trait['authors'] = line[7] this_trait['year'] = line[8] + this_trait['pubmed_text'] = "N/A" + this_trait['pubmed_link'] = "N/A" if this_trait['year'].isdigit(): this_trait['pubmed_text'] = this_trait['year'] - else: - this_trait['pubmed_text'] = "N/A" if line[9] != "" and line[9] != None: this_trait['pubmed_link'] = webqtlConfig.PUBMEDLINK_URL % line[8] - else: - this_trait['pubmed_link'] = "N/A" if line[12]: - this_trait['display_name'] = line[12] + "_" + str(this_trait['name']) - this_trait['LRS_score_repr'] = "N/A" - if line[10] != "" and line[10] != None: - this_trait['LRS_score_repr'] = '%3.1f' % line[10] - this_trait['additive'] = "N/A" - if line[11] != "" and line[11] != None: - this_trait['additive'] = '%.3f' % line[11] - - this_trait['max_lrs_text'] = "N/A" - trait_ob = create_trait(dataset_name=this_trait['dataset'], name=this_trait['name'], get_qtl_info=True, get_sample_info=False) - if not trait_ob: - continue - if this_trait['dataset'] == this_trait['group'] + "Publish": - try: - if trait_ob.locus_chr != "" and trait_ob.locus_mb != "": - this_trait['max_lrs_text'] = "Chr" + str(trait_ob.locus_chr) + ": " + str(trait_ob.locus_mb) - except: - this_trait['max_lrs_text'] = "N/A" + this_trait['display_name'] = line[12] + \ + "_" + str(this_trait['name']) + + dataset_ob = SimpleNamespace(id=this_trait["dataset_id"], type="Publish", species=this_trait["species"]) + permissions = check_resource_availability(dataset_ob, this_trait['name']) + if type(permissions['data']) is list: + if "view" not in permissions['data']: + continue + else: + if permissions['data'] == 'no-access': + continue trait_list.append(this_trait) self.trait_count = len(trait_list) - self.trait_list = json.dumps(trait_list) + self.trait_list = trait_list self.header_fields = ['Index', 'Species', @@ -254,3 +296,19 @@ class GSearch(object): 'Max LRS', 'Max LRS Location', 'Additive Effect'] + + self.header_data_names = [ + 'index', + 'name', + 'species', + 'group', + 'tissue', + 'dataset_fullname', + 'symbol', + 'description', + 'location_repr', + 'mean', + 'LRS_score_repr', + 'max_lrs_text', + 'additive', + ] diff --git a/wqflask/wqflask/heatmap/heatmap.py b/wqflask/wqflask/heatmap/heatmap.py index cca5a4fc..001bab3b 100644 --- a/wqflask/wqflask/heatmap/heatmap.py +++ b/wqflask/wqflask/heatmap/heatmap.py @@ -12,19 +12,21 @@ from utility.logger import getLogger Redis = Redis() -logger = getLogger(__name__ ) +logger = getLogger(__name__) -class Heatmap(object): + +class Heatmap: def __init__(self, start_vars, temp_uuid): - trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')] + trait_db_list = [trait.strip() + for trait in start_vars['trait_list'].split(',')] helper_functions.get_trait_db_obs(self, trait_db_list) self.temp_uuid = temp_uuid self.num_permutations = 5000 self.dataset = self.trait_list[0][1] - self.json_data = {} #The dictionary that will be used to create the json object that contains all the data needed to create the figure + self.json_data = {} # The dictionary that will be used to create the json object that contains all the data needed to create the figure self.all_sample_list = [] self.traits = [] @@ -32,7 +34,8 @@ class Heatmap(object): chrnames = [] self.species = species.TheSpecies(dataset=self.trait_list[0][1]) for key in list(self.species.chromosomes.chromosomes.keys()): - chrnames.append([self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length]) + chrnames.append([self.species.chromosomes.chromosomes[key].name, + self.species.chromosomes.chromosomes[key].mb_length]) for trait_db in self.trait_list: @@ -83,7 +86,7 @@ class Heatmap(object): self.json_data[trait] = self.trait_results[trait] self.js_data = dict( - json_data = self.json_data + json_data=self.json_data ) def gen_reaper_results(self): @@ -107,19 +110,22 @@ class Heatmap(object): trimmed_samples.append(str(samples[i])) trimmed_values.append(values[i]) - trait_filename = str(this_trait.name) + "_" + str(self.dataset.name) + "_pheno" + trait_filename = str(this_trait.name) + "_" + \ + str(self.dataset.name) + "_pheno" gen_pheno_txt_file(trimmed_samples, trimmed_values, trait_filename) - output_filename = self.dataset.group.name + "_GWA_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) + output_filename = self.dataset.group.name + "_GWA_" + \ + ''.join(random.choice(string.ascii_uppercase + string.digits) + for _ in range(6)) reaper_command = REAPER_COMMAND + ' --geno {0}/{1}.geno --traits {2}/gn2/{3}.txt -n 1000 -o {4}{5}.txt'.format(flat_files('genotype'), - genofile_name, - TEMPDIR, - trait_filename, - webqtlConfig.GENERATED_IMAGE_DIR, - output_filename) + genofile_name, + TEMPDIR, + trait_filename, + webqtlConfig.GENERATED_IMAGE_DIR, + output_filename) - os.system(reaper_command) + os.system(reaper_command) reaper_results = parse_reaper_output(output_filename) @@ -128,9 +134,12 @@ class Heatmap(object): self.trait_results[this_trait.name] = [] for qtl in reaper_results: if qtl['additive'] > 0: - self.trait_results[this_trait.name].append(-float(qtl['lrs_value'])) + self.trait_results[this_trait.name].append( + -float(qtl['lrs_value'])) else: - self.trait_results[this_trait.name].append(float(qtl['lrs_value'])) + self.trait_results[this_trait.name].append( + float(qtl['lrs_value'])) + def gen_pheno_txt_file(samples, vals, filename): """Generates phenotype file for GEMMA""" @@ -151,6 +160,7 @@ def gen_pheno_txt_file(samples, vals, filename): values_string = "\t".join(filtered_vals_list) outfile.write(values_string) + def parse_reaper_output(gwa_filename): included_markers = [] p_values = [] diff --git a/wqflask/wqflask/interval_analyst/GeneUtil.py b/wqflask/wqflask/interval_analyst/GeneUtil.py index d0dd7aea..5e86ae31 100644 --- a/wqflask/wqflask/interval_analyst/GeneUtil.py +++ b/wqflask/wqflask/interval_analyst/GeneUtil.py @@ -2,30 +2,32 @@ import string from flask import Flask, g -#Just return a list of dictionaries -#each dictionary contains sub-dictionary +# Just return a list of dictionaries +# each dictionary contains sub-dictionary + + def loadGenes(chrName, diffCol, startMb, endMb, species='mouse'): - fetchFields = ['SpeciesId', 'Id', 'GeneSymbol', 'GeneDescription', 'Chromosome', 'TxStart', 'TxEnd', - 'Strand', 'GeneID', 'NM_ID', 'kgID', 'GenBankID', 'UnigenID', 'ProteinID', 'AlignID', - 'exonCount', 'exonStarts', 'exonEnds', 'cdsStart', 'cdsEnd'] - - ##List All Species in the Gene Table - speciesDict = {} - results = g.db.execute(""" + fetchFields = ['SpeciesId', 'Id', 'GeneSymbol', 'GeneDescription', 'Chromosome', 'TxStart', 'TxEnd', + 'Strand', 'GeneID', 'NM_ID', 'kgID', 'GenBankID', 'UnigenID', 'ProteinID', 'AlignID', + 'exonCount', 'exonStarts', 'exonEnds', 'cdsStart', 'cdsEnd'] + + # List All Species in the Gene Table + speciesDict = {} + results = g.db.execute(""" SELECT Species.Name, GeneList.SpeciesId FROM Species, GeneList WHERE GeneList.SpeciesId = Species.Id GROUP BY GeneList.SpeciesId""").fetchall() - for item in results: - speciesDict[item[0]] = item[1] - - ##List current Species and other Species - speciesId = speciesDict[species] - otherSpecies = [[X, speciesDict[X]] for X in list(speciesDict.keys())] - otherSpecies.remove([species, speciesId]) + for item in results: + speciesDict[item[0]] = item[1] - results = g.db.execute(""" + # List current Species and other Species + speciesId = speciesDict[species] + otherSpecies = [[X, speciesDict[X]] for X in list(speciesDict.keys())] + otherSpecies.remove([species, speciesId]) + + results = g.db.execute(""" SELECT %s FROM GeneList WHERE SpeciesId = %d AND Chromosome = '%s' AND @@ -36,47 +38,49 @@ def loadGenes(chrName, diffCol, startMb, endMb, species='mouse'): startMb, endMb, startMb, endMb)).fetchall() - GeneList = [] + GeneList = [] - if results: - for result in results: - newdict = {} - for j, item in enumerate(fetchFields): - newdict[item] = result[j] - #count SNPs if possible - if diffCol and species=='mouse': - newdict["snpCount"] = g.db.execute(""" + if results: + for result in results: + newdict = {} + for j, item in enumerate(fetchFields): + newdict[item] = result[j] + # count SNPs if possible + if diffCol and species == 'mouse': + newdict["snpCount"] = g.db.execute(""" SELECT count(*) FROM BXDSnpPosition WHERE Chr = '%s' AND Mb >= %2.6f AND Mb < %2.6f AND StrainId1 = %d AND StrainId2 = %d """ % (chrName, newdict["TxStart"], newdict["TxEnd"], diffCol[0], diffCol[1])).fetchone()[0] - newdict["snpDensity"] = newdict["snpCount"]/(newdict["TxEnd"]-newdict["TxStart"])/1000.0 - else: - newdict["snpDensity"] = newdict["snpCount"] = 0 - - try: - newdict['GeneLength'] = 1000.0*(newdict['TxEnd'] - newdict['TxStart']) - except: - pass - - #load gene from other Species by the same name - for item in otherSpecies: - othSpec, othSpecId = item - newdict2 = {} - - resultsOther = g.db.execute("SELECT %s FROM GeneList WHERE SpeciesId = %d AND geneSymbol= '%s' LIMIT 1" % (", ".join(fetchFields), + newdict["snpDensity"] = newdict["snpCount"] / \ + (newdict["TxEnd"] - newdict["TxStart"]) / 1000.0 + else: + newdict["snpDensity"] = newdict["snpCount"] = 0 + + try: + newdict['GeneLength'] = 1000.0 * \ + (newdict['TxEnd'] - newdict['TxStart']) + except: + pass + + # load gene from other Species by the same name + for item in otherSpecies: + othSpec, othSpecId = item + newdict2 = {} + + resultsOther = g.db.execute("SELECT %s FROM GeneList WHERE SpeciesId = %d AND geneSymbol= '%s' LIMIT 1" % (", ".join(fetchFields), othSpecId, newdict["GeneSymbol"])).fetchone() - if resultsOther: - for j, item in enumerate(fetchFields): - newdict2[item] = resultsOther[j] - - #count SNPs if possible, could be a separate function - if diffCol and othSpec == 'mouse': - newdict2["snpCount"] = g.db.execute(""" + if resultsOther: + for j, item in enumerate(fetchFields): + newdict2[item] = resultsOther[j] + + # count SNPs if possible, could be a separate function + if diffCol and othSpec == 'mouse': + newdict2["snpCount"] = g.db.execute(""" SELECT count(*) FROM BXDSnpPosition WHERE Chr = '%s' AND @@ -84,19 +88,19 @@ def loadGenes(chrName, diffCol, startMb, endMb, species='mouse'): StrainId1 = %d AND StrainId2 = %d """ % (chrName, newdict["TxStart"], newdict["TxEnd"], diffCol[0], diffCol[1])).fetchone()[0] - newdict2["snpDensity"] = newdict2["snpCount"]/(newdict2["TxEnd"]-newdict2["TxStart"])/1000.0 - else: - newdict2["snpDensity"] = newdict2["snpCount"] = 0 - - try: - newdict2['GeneLength'] = 1000.0*(newdict2['TxEnd'] - newdict2['TxStart']) - except: - pass - - newdict['%sGene' % othSpec] = newdict2 - - GeneList.append(newdict) + newdict2["snpDensity"] = newdict2["snpCount"] / \ + (newdict2["TxEnd"] - newdict2["TxStart"]) / 1000.0 + else: + newdict2["snpDensity"] = newdict2["snpCount"] = 0 + + try: + newdict2['GeneLength'] = 1000.0 * \ + (newdict2['TxEnd'] - newdict2['TxStart']) + except: + pass - return GeneList + newdict['%sGene' % othSpec] = newdict2 + GeneList.append(newdict) + return GeneList diff --git a/wqflask/wqflask/markdown_routes.py b/wqflask/wqflask/markdown_routes.py index ebf75807..b81bfb55 100644 --- a/wqflask/wqflask/markdown_routes.py +++ b/wqflask/wqflask/markdown_routes.py @@ -2,17 +2,21 @@ Render pages from github, or if they are unavailable, look for it else where """ + import requests import markdown import os import sys -from bs4 import BeautifulSoup +from bs4 import BeautifulSoup # type: ignore from flask import send_from_directory from flask import Blueprint from flask import render_template +from typing import Dict +from typing import List + glossary_blueprint = Blueprint('glossary_blueprint', __name__) references_blueprint = Blueprint("references_blueprint", __name__) environments_blueprint = Blueprint("environments_blueprint", __name__) @@ -20,6 +24,8 @@ links_blueprint = Blueprint("links_blueprint", __name__) policies_blueprint = Blueprint("policies_blueprint", __name__) facilities_blueprint = Blueprint("facilities_blueprint", __name__) +blogs_blueprint = Blueprint("blogs_blueprint", __name__) + def render_markdown(file_name, is_remote_file=True): """Try to fetch the file name from Github and if that fails, try to @@ -55,6 +61,40 @@ def get_file_from_python_search_path(pathname_suffix): return None +def get_blogs(user: str = "genenetwork", + repo_name: str = "gn-docs") -> dict: + + blogs: Dict[int, List] = {} + github_url = f"https://api.github.com/repos/{user}/{repo_name}/git/trees/master?recursive=1" + + repo_tree = requests.get(github_url).json()["tree"] + + for data in repo_tree: + path_name = data["path"] + if path_name.startswith("blog") and path_name.endswith(".md"): + split_path = path_name.split("/")[1:] + try: + year, title, file_name = split_path + except Exception as e: + year, file_name = split_path + title = "" + + subtitle = os.path.splitext(file_name)[0] + + blog = { + "title": title, + "subtitle": subtitle, + "full_path": path_name + } + + if year in blogs: + blogs[int(year)].append(blog) + else: + blogs[int(year)] = [blog] + + return dict(sorted(blogs.items(), key=lambda x: x[0], reverse=True)) + + @glossary_blueprint.route('/') def glossary(): return render_template( @@ -103,7 +143,7 @@ def environments(): @environments_blueprint.route('/svg-dependency-graph') def svg_graph(): directory, file_name, _ = get_file_from_python_search_path( - "wqflask/dependency-graph.svg").partition("dependency-graph.svg") + "wqflask/dependency-graph.svg").partition("dependency-graph.svg") return send_from_directory(directory, file_name) @@ -124,3 +164,15 @@ def policies(): @facilities_blueprint.route("/") def facilities(): return render_template("facilities.html", rendered_markdown=render_markdown("general/help/facilities.md")), 200 + + +@blogs_blueprint.route("/<path:blog_path>") +def display_blog(blog_path): + return render_template("blogs.html", rendered_markdown=render_markdown(blog_path)) + + +@blogs_blueprint.route("/") +def blogs_list(): + blogs = get_blogs() + + return render_template("blogs_list.html", blogs=blogs) diff --git a/wqflask/wqflask/marker_regression/display_mapping_results.py b/wqflask/wqflask/marker_regression/display_mapping_results.py index 6a5fe2f6..ec17d3b0 100644 --- a/wqflask/wqflask/marker_regression/display_mapping_results.py +++ b/wqflask/wqflask/marker_regression/display_mapping_results.py @@ -51,7 +51,7 @@ try: # Only import this for Python3 from functools import reduce except: pass -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) RED = ImageColor.getrgb("red") BLUE = ImageColor.getrgb("blue") @@ -152,7 +152,7 @@ class HtmlGenWrapper: return map_ -class DisplayMappingResults(object): +class DisplayMappingResults: """Inteval Mapping Plot Page""" cMGraphInterval = 5 GRAPH_MIN_WIDTH = 900 @@ -307,7 +307,8 @@ class DisplayMappingResults(object): if 'color_scheme' in start_vars: self.color_scheme = start_vars['color_scheme'] if self.color_scheme == "single": - self.manhattan_single_color = ImageColor.getrgb("#" + start_vars['manhattan_single_color']) + self.manhattan_single_color = ImageColor.getrgb( + "#" + start_vars['manhattan_single_color']) if 'permCheck' in list(start_vars.keys()): self.permChecked = start_vars['permCheck'] @@ -357,7 +358,8 @@ class DisplayMappingResults(object): if 'reaper_version' in list(start_vars.keys()) and self.mapping_method == "reaper": self.reaper_version = start_vars['reaper_version'] if 'output_files' in start_vars: - self.output_files = ",".join([(the_file if the_file is not None else "") for the_file in start_vars['output_files']]) + self.output_files = ",".join( + [(the_file if the_file is not None else "") for the_file in start_vars['output_files']]) self.categorical_vars = "" self.perm_strata = "" @@ -386,28 +388,31 @@ class DisplayMappingResults(object): self.dataset.group.genofile = self.genofile_string.split(":")[0] if self.mapping_method == "reaper" and self.manhattan_plot != True: - self.genotype = self.dataset.group.read_genotype_file(use_reaper=True) + self.genotype = self.dataset.group.read_genotype_file( + use_reaper=True) else: self.genotype = self.dataset.group.read_genotype_file() - #Darwing Options + # Darwing Options try: - if self.selectedChr > -1: - self.graphWidth = min(self.GRAPH_MAX_WIDTH, max(self.GRAPH_MIN_WIDTH, int(start_vars['graphWidth']))) - else: - self.graphWidth = min(self.GRAPH_MAX_WIDTH, max(self.MULT_GRAPH_MIN_WIDTH, int(start_vars['graphWidth']))) + if self.selectedChr > -1: + self.graphWidth = min(self.GRAPH_MAX_WIDTH, max( + self.GRAPH_MIN_WIDTH, int(start_vars['graphWidth']))) + else: + self.graphWidth = min(self.GRAPH_MAX_WIDTH, max( + self.MULT_GRAPH_MIN_WIDTH, int(start_vars['graphWidth']))) except: - if self.selectedChr > -1: - self.graphWidth = self.GRAPH_DEFAULT_WIDTH - else: - self.graphWidth = self.MULT_GRAPH_DEFAULT_WIDTH + if self.selectedChr > -1: + self.graphWidth = self.GRAPH_DEFAULT_WIDTH + else: + self.graphWidth = self.MULT_GRAPH_DEFAULT_WIDTH -## BEGIN HaplotypeAnalyst +# BEGIN HaplotypeAnalyst if 'haplotypeAnalystCheck' in list(start_vars.keys()): self.haplotypeAnalystChecked = start_vars['haplotypeAnalystCheck'] else: self.haplotypeAnalystChecked = False -## END HaplotypeAnalyst +# END HaplotypeAnalyst self.graphHeight = self.GRAPH_DEFAULT_HEIGHT self.dominanceChecked = False @@ -446,7 +451,7 @@ class DisplayMappingResults(object): except: self.lrsMax = 0 - #Trait Infos + # Trait Infos self.identification = "" ################################################################ @@ -471,10 +476,12 @@ class DisplayMappingResults(object): Chr_Length.OrderId """ % (self.dataset.group.name, ", ".join(["'%s'" % X[0] for X in self.ChrList[1:]]))) - self.ChrLengthMbList = [x[0]/1000000.0 for x in self.ChrLengthMbList] - self.ChrLengthMbSum = reduce(lambda x, y:x+y, self.ChrLengthMbList, 0.0) + self.ChrLengthMbList = [x[0] / 1000000.0 for x in self.ChrLengthMbList] + self.ChrLengthMbSum = reduce( + lambda x, y: x + y, self.ChrLengthMbList, 0.0) if self.ChrLengthMbList: - self.MbGraphInterval = self.ChrLengthMbSum/(len(self.ChrLengthMbList)*12) #Empirical Mb interval + self.MbGraphInterval = self.ChrLengthMbSum / \ + (len(self.ChrLengthMbList) * 12) # Empirical Mb interval else: self.MbGraphInterval = 1 @@ -482,38 +489,38 @@ class DisplayMappingResults(object): for i, _chr in enumerate(self.genotype): self.ChrLengthCMList.append(_chr[-1].cM - _chr[0].cM) - self.ChrLengthCMSum = reduce(lambda x, y:x+y, self.ChrLengthCMList, 0.0) + self.ChrLengthCMSum = reduce( + lambda x, y: x + y, self.ChrLengthCMList, 0.0) if self.plotScale == 'physic': - self.GraphInterval = self.MbGraphInterval #Mb + self.GraphInterval = self.MbGraphInterval # Mb else: - self.GraphInterval = self.cMGraphInterval #cM + self.GraphInterval = self.cMGraphInterval # cM -## BEGIN HaplotypeAnalyst -## count the amount of individuals to be plotted, and increase self.graphHeight +# BEGIN HaplotypeAnalyst +# count the amount of individuals to be plotted, and increase self.graphHeight if self.haplotypeAnalystChecked and self.selectedChr > -1: thisTrait = self.this_trait - smd=[] + smd = [] for sample in self.sample_vals_dict.keys(): if self.sample_vals_dict[sample] != "x": - temp = GeneralObject(name=sample, value=float(self.sample_vals_dict[sample])) + temp = GeneralObject(name=sample, value=float( + self.sample_vals_dict[sample])) smd.append(temp) else: continue samplelist = list(self.genotype.prgy) - for j, _geno in enumerate (self.genotype[0][1].genotype): + for j, _geno in enumerate(self.genotype[0][1].genotype): for item in smd: if item.name == samplelist[j]: self.NR_INDIVIDUALS = self.NR_INDIVIDUALS + 1 # default: - self.graphHeight = self.graphHeight + 2 * (self.NR_INDIVIDUALS+10) * self.EACH_GENE_HEIGHT -## END HaplotypeAnalyst - - - + self.graphHeight = self.graphHeight + 2 * \ + (self.NR_INDIVIDUALS + 10) * self.EACH_GENE_HEIGHT +# END HaplotypeAnalyst ######################### - ## Get the sorting column + # Get the sorting column ######################### RISet = self.dataset.group.name if RISet in ('AXB', 'BXA', 'AXBXA'): @@ -529,10 +536,11 @@ class DisplayMappingResults(object): elif RISet in ('LXS'): self.diffCol = ['ILS', 'ISS'] else: - self.diffCol= [] + self.diffCol = [] for i, strain in enumerate(self.diffCol): - self.diffCol[i] = g.db.execute("select Id from Strain where Symbol = %s", strain).fetchone()[0] + self.diffCol[i] = g.db.execute( + "select Id from Strain where Symbol = %s", strain).fetchone()[0] ################################################################ # GeneCollection goes here @@ -546,7 +554,7 @@ class DisplayMappingResults(object): geneTable = "" self.geneCol = None - if self.plotScale == 'physic' and self.selectedChr > -1 and (self.intervalAnalystChecked or self.geneChecked): + if self.plotScale == 'physic' and self.selectedChr > -1 and (self.intervalAnalystChecked or self.geneChecked): # Draw the genes for this chromosome / region of this chromosome webqtldatabase = self.dataset.name @@ -555,24 +563,26 @@ class DisplayMappingResults(object): chrName = "X" else: chrName = self.selectedChr - self.geneCol = GeneUtil.loadGenes(chrName, self.diffCol, self.startMb, self.endMb, "mouse") + self.geneCol = GeneUtil.loadGenes( + chrName, self.diffCol, self.startMb, self.endMb, "mouse") elif self.dataset.group.species == "rat": if self.selectedChr == 21: chrName = "X" else: chrName = self.selectedChr - self.geneCol = GeneUtil.loadGenes(chrName, self.diffCol, self.startMb, self.endMb, "rat") + self.geneCol = GeneUtil.loadGenes( + chrName, self.diffCol, self.startMb, self.endMb, "rat") if self.geneCol and self.intervalAnalystChecked: - ####################################################################### - #Nick use GENEID as RefGene to get Literature Correlation Informations# - #For Interval Mapping, Literature Correlation isn't useful, so skip it# - #through set GENEID is None # - ####################################################################### + ####################################################################### + #Nick use GENEID as RefGene to get Literature Correlation Informations# + #For Interval Mapping, Literature Correlation isn't useful, so skip it# + #through set GENEID is None # + ####################################################################### - GENEID = None + GENEID = None - self.geneTable(self.geneCol, GENEID) + self.geneTable(self.geneCol, GENEID) ################################################################ # Plots goes here @@ -580,11 +590,12 @@ class DisplayMappingResults(object): showLocusForm = "" intCanvas = Image.new("RGBA", size=(self.graphWidth, self.graphHeight)) with Bench("Drawing Plot"): - gifmap = self.plotIntMapping(intCanvas, startMb = self.startMb, endMb = self.endMb, showLocusForm= showLocusForm) + gifmap = self.plotIntMapping( + intCanvas, startMb=self.startMb, endMb=self.endMb, showLocusForm=showLocusForm) self.gifmap = gifmap.__str__() - self.filename= webqtlUtil.genRandStr("Itvl_") + self.filename = webqtlUtil.genRandStr("Itvl_") intCanvas.save( "{}.png".format( os.path.join(webqtlConfig.GENERATED_IMAGE_DIR, self.filename)), @@ -594,20 +605,22 @@ class DisplayMappingResults(object): border="0", usemap='#WebQTLImageMap' ) - #Scales plot differently for high resolution + # Scales plot differently for high resolution if self.draw2X: - intCanvasX2 = Image.new("RGBA", size=(self.graphWidth*2, self.graphHeight*2)) - gifmapX2 = self.plotIntMapping(intCanvasX2, startMb = self.startMb, endMb = self.endMb, showLocusForm= showLocusForm, zoom=2) + intCanvasX2 = Image.new("RGBA", size=( + self.graphWidth * 2, self.graphHeight * 2)) + gifmapX2 = self.plotIntMapping( + intCanvasX2, startMb=self.startMb, endMb=self.endMb, showLocusForm=showLocusForm, zoom=2) intCanvasX2.save( "{}.png".format( os.path.join(webqtlConfig.GENERATED_IMAGE_DIR, - self.filename+"X2")), + self.filename + "X2")), format='png') ################################################################ # Outputs goes here ################################################################ - #this form is used for opening Locus page or trait page, only available for genetic mapping + # this form is used for opening Locus page or trait page, only available for genetic mapping if showLocusForm: showLocusForm = HtmlGenWrapper.create_form_tag( cgi=os.path.join(webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE), @@ -615,7 +628,8 @@ class DisplayMappingResults(object): name=showLocusForm, submit=HtmlGenWrapper.create_input_tag(type_='hidden')) - hddn = {'FormID':'showDatabase', 'ProbeSetID':'_','database':fd.RISet+"Geno",'CellID':'_', 'RISet':fd.RISet, 'incparentsf1':'ON'} + hddn = {'FormID': 'showDatabase', 'ProbeSetID': '_', 'database': fd.RISet + \ + "Geno", 'CellID': '_', 'RISet': fd.RISet, 'incparentsf1': 'ON'} for key in hddn.keys(): showLocusForm.append(HtmlGenWrapper.create_input_tag( name=key, value=hddn[key], type_='hidden')) @@ -634,11 +648,12 @@ class DisplayMappingResults(object): if self.traitList and self.traitList[0].dataset and self.traitList[0].dataset.type == 'Geno': btminfo.append(HtmlGenWrapper.create_br_tag()) - btminfo.append('Mapping using genotype data as a trait will result in infinity LRS at one locus. In order to display the result properly, all LRSs higher than 100 are capped at 100.') + btminfo.append( + 'Mapping using genotype data as a trait will result in infinity LRS at one locus. In order to display the result properly, all LRSs higher than 100 are capped at 100.') - def plotIntMapping(self, canvas, offset= (80, 120, 90, 100), zoom = 1, startMb = None, endMb = None, showLocusForm = ""): + def plotIntMapping(self, canvas, offset=(80, 120, 90, 100), zoom=1, startMb=None, endMb=None, showLocusForm=""): im_drawer = ImageDraw.Draw(canvas) - #calculating margins + # calculating margins xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset if self.multipleInterval: yTopOffset = max(90, yTopOffset) @@ -659,34 +674,36 @@ class DisplayMappingResults(object): xLeftOffset += 20 fontZoom = 1.5 - xLeftOffset = int(xLeftOffset*fontZoom) - xRightOffset = int(xRightOffset*fontZoom) - yBottomOffset = int(yBottomOffset*fontZoom) + xLeftOffset = int(xLeftOffset * fontZoom) + xRightOffset = int(xRightOffset * fontZoom) + yBottomOffset = int(yBottomOffset * fontZoom) cWidth = canvas.size[0] cHeight = canvas.size[1] plotWidth = cWidth - xLeftOffset - xRightOffset plotHeight = cHeight - yTopOffset - yBottomOffset - #Drawing Area Height + # Drawing Area Height drawAreaHeight = plotHeight if self.plotScale == 'physic' and self.selectedChr > -1: if self.dataset.group.species == "mouse" or self.dataset.group.species == "rat": - drawAreaHeight -= 4*self.BAND_HEIGHT + 4*self.BAND_SPACING+ 10*zoom + drawAreaHeight -= 4 * self.BAND_HEIGHT + 4 * self.BAND_SPACING + 10 * zoom else: - drawAreaHeight -= 3*self.BAND_HEIGHT + 3*self.BAND_SPACING+ 10*zoom + drawAreaHeight -= 3 * self.BAND_HEIGHT + 3 * self.BAND_SPACING + 10 * zoom if self.geneChecked: - drawAreaHeight -= self.NUM_GENE_ROWS*self.EACH_GENE_HEIGHT + 3*self.BAND_SPACING + 10*zoom + drawAreaHeight -= self.NUM_GENE_ROWS * \ + self.EACH_GENE_HEIGHT + 3 * self.BAND_SPACING + 10 * zoom else: if self.selectedChr > -1: drawAreaHeight -= 20 else: drawAreaHeight -= 30 -## BEGIN HaplotypeAnalyst +# BEGIN HaplotypeAnalyst if self.haplotypeAnalystChecked and self.selectedChr > -1: - drawAreaHeight -= self.EACH_GENE_HEIGHT * (self.NR_INDIVIDUALS+10) * 2 * zoom -## END HaplotypeAnalyst + drawAreaHeight -= self.EACH_GENE_HEIGHT * \ + (self.NR_INDIVIDUALS + 10) * 2 * zoom +# END HaplotypeAnalyst if zoom == 2: drawAreaHeight -= 60 @@ -696,42 +713,52 @@ class DisplayMappingResults(object): newoffset = (xLeftOffset, xRightOffset, yTopOffset, yBottomOffset) # Draw the alternating-color background first and get plotXScale - plotXScale = self.drawGraphBackground(canvas, gifmap, offset=newoffset, zoom= zoom, startMb=startMb, endMb = endMb) + plotXScale = self.drawGraphBackground( + canvas, gifmap, offset=newoffset, zoom=zoom, startMb=startMb, endMb=endMb) - #draw bootstap + # draw bootstap if self.bootChecked and not self.multipleInterval: - self.drawBootStrapResult(canvas, self.nboot, drawAreaHeight, plotXScale, offset=newoffset, zoom= zoom, startMb=startMb, endMb = endMb) + self.drawBootStrapResult(canvas, self.nboot, drawAreaHeight, plotXScale, + offset=newoffset, zoom=zoom, startMb=startMb, endMb=endMb) # Draw clickable region and gene band if selected if self.plotScale == 'physic' and self.selectedChr > -1: - self.drawClickBand(canvas, gifmap, plotXScale, offset=newoffset, zoom = zoom, startMb=startMb, endMb = endMb) + self.drawClickBand(canvas, gifmap, plotXScale, offset=newoffset, + zoom=zoom, startMb=startMb, endMb=endMb) if self.geneChecked and self.geneCol: - self.drawGeneBand(canvas, gifmap, plotXScale, offset=newoffset, zoom = zoom, startMb=startMb, endMb = endMb) + self.drawGeneBand(canvas, gifmap, plotXScale, offset=newoffset, + zoom=zoom, startMb=startMb, endMb=endMb) if self.SNPChecked: - self.drawSNPTrackNew(canvas, offset=newoffset, zoom = 2*zoom, startMb=startMb, endMb = endMb) -## BEGIN HaplotypeAnalyst + self.drawSNPTrackNew( + canvas, offset=newoffset, zoom=2 * zoom, startMb=startMb, endMb=endMb) +# BEGIN HaplotypeAnalyst if self.haplotypeAnalystChecked: - self.drawHaplotypeBand(canvas, gifmap, plotXScale, offset=newoffset, zoom = zoom, startMb=startMb, endMb = endMb) -## END HaplotypeAnalyst + self.drawHaplotypeBand( + canvas, gifmap, plotXScale, offset=newoffset, zoom=zoom, startMb=startMb, endMb=endMb) +# END HaplotypeAnalyst # Draw X axis - self.drawXAxis(canvas, drawAreaHeight, gifmap, plotXScale, showLocusForm, offset=newoffset, zoom = zoom, startMb=startMb, endMb = endMb) + self.drawXAxis(canvas, drawAreaHeight, gifmap, plotXScale, showLocusForm, + offset=newoffset, zoom=zoom, startMb=startMb, endMb=endMb) # Draw QTL curve - self.drawQTL(canvas, drawAreaHeight, gifmap, plotXScale, offset=newoffset, zoom= zoom, startMb=startMb, endMb = endMb) + self.drawQTL(canvas, drawAreaHeight, gifmap, plotXScale, + offset=newoffset, zoom=zoom, startMb=startMb, endMb=endMb) - #draw legend + # draw legend if self.multipleInterval: - self.drawMultiTraitName(fd, canvas, gifmap, showLocusForm, offset=newoffset) + self.drawMultiTraitName( + fd, canvas, gifmap, showLocusForm, offset=newoffset) elif self.legendChecked: - self.drawLegendPanel(canvas, offset=newoffset, zoom = zoom) + self.drawLegendPanel(canvas, offset=newoffset, zoom=zoom) else: pass - #draw position, no need to use a separate function - self.drawProbeSetPosition(canvas, plotXScale, offset=newoffset, zoom = zoom) + # draw position, no need to use a separate function + self.drawProbeSetPosition( + canvas, plotXScale, offset=newoffset, zoom=zoom) return gifmap - def drawBootStrapResult(self, canvas, nboot, drawAreaHeight, plotXScale, offset= (40, 120, 80, 10), zoom = 1, startMb = None, endMb = None): + def drawBootStrapResult(self, canvas, nboot, drawAreaHeight, plotXScale, offset=(40, 120, 80, 10), zoom=1, startMb=None, endMb=None): im_drawer = ImageDraw.Draw(canvas) xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset plotWidth = canvas.size[0] - xLeftOffset - xRightOffset @@ -741,9 +768,9 @@ class DisplayMappingResults(object): if zoom == 2: fontZoom = 1.5 - bootHeightThresh = drawAreaHeight*3/4 + bootHeightThresh = drawAreaHeight * 3 / 4 - #break bootstrap result into groups + # break bootstrap result into groups BootCoord = [] i = 0 previous_chr = None @@ -751,7 +778,7 @@ class DisplayMappingResults(object): startX = xLeftOffset BootChrCoord = [] - if self.selectedChr == -1: #ZS: If viewing full genome/all chromosomes + if self.selectedChr == -1: # ZS: If viewing full genome/all chromosomes for i, result in enumerate(self.qtlresults): if result['chr'] != previous_chr: previous_chr = result['chr'] @@ -759,28 +786,33 @@ class DisplayMappingResults(object): if previous_chr_as_int != 1: BootCoord.append(BootChrCoord) BootChrCoord = [] - startX += (self.ChrLengthDistList[previous_chr_as_int - 2] + self.GraphInterval)*plotXScale + startX += ( + self.ChrLengthDistList[previous_chr_as_int - 2] + self.GraphInterval) * plotXScale if self.plotScale == 'physic': - Xc = startX + (result['Mb']-self.startMb)*plotXScale + Xc = startX + (result['Mb'] - self.startMb) * plotXScale else: - Xc = startX + (result['cM']-self.qtlresults[0]['cM'])*plotXScale + Xc = startX + \ + (result['cM'] - self.qtlresults[0]['cM']) * plotXScale BootChrCoord.append([Xc, self.bootResult[i]]) else: for i, result in enumerate(self.qtlresults): if str(result['chr']) == str(self.ChrList[self.selectedChr][0]): if self.plotScale == 'physic': - Xc = startX + (result['Mb']-self.startMb)*plotXScale + Xc = startX + (result['Mb'] - \ + self.startMb) * plotXScale else: - Xc = startX + (result['cM']-self.qtlresults[0]['cM'])*plotXScale + Xc = startX + \ + (result['cM'] - self.qtlresults[0] + ['cM']) * plotXScale BootChrCoord.append([Xc, self.bootResult[i]]) BootCoord = [BootChrCoord] - #reduce bootResult + # reduce bootResult if self.selectedChr > -1: maxBootBar = 80.0 else: maxBootBar = 200.0 - stepBootStrap = plotWidth/maxBootBar + stepBootStrap = plotWidth / maxBootBar reducedBootCoord = [] maxBootCount = 0 @@ -796,14 +828,16 @@ class DisplayMappingResults(object): if maxBootCount < bootCount: maxBootCount = bootCount # end if - reducedBootCoord.append([bootStartPixX, BootChrCoord[i][0], bootCount]) + reducedBootCoord.append( + [bootStartPixX, BootChrCoord[i][0], bootCount]) bootStartPixX = BootChrCoord[i][0] bootCount = BootChrCoord[i][1] # end else # end for - #add last piece - if BootChrCoord[-1][0] - bootStartPixX > stepBootStrap/2.0: - reducedBootCoord.append([bootStartPixX, BootChrCoord[-1][0], bootCount]) + # add last piece + if BootChrCoord[-1][0] - bootStartPixX > stepBootStrap / 2.0: + reducedBootCoord.append( + [bootStartPixX, BootChrCoord[-1][0], bootCount]) else: reducedBootCoord[-1][2] += bootCount reducedBootCoord[-1][1] = BootChrCoord[-1][0] @@ -815,64 +849,68 @@ class DisplayMappingResults(object): if item[2] > 0: if item[0] < xLeftOffset: item[0] = xLeftOffset - if item[0] > xLeftOffset+plotWidth: - item[0] = xLeftOffset+plotWidth + if item[0] > xLeftOffset + plotWidth: + item[0] = xLeftOffset + plotWidth if item[1] < xLeftOffset: item[1] = xLeftOffset - if item[1] > xLeftOffset+plotWidth: - item[1] = xLeftOffset+plotWidth + if item[1] > xLeftOffset + plotWidth: + item[1] = xLeftOffset + plotWidth if item[0] != item[1]: im_drawer.rectangle( xy=((item[0], yZero), - (item[1], yZero - item[2]*bootHeightThresh/maxBootCount)), + (item[1], yZero - item[2] * bootHeightThresh / maxBootCount)), fill=self.BOOTSTRAP_BOX_COLOR, outline=BLACK) - ###draw boot scale - highestPercent = (maxBootCount*100.0)/nboot + # draw boot scale + highestPercent = (maxBootCount * 100.0) / nboot bootScale = Plot.detScale(0, highestPercent) - bootScale = Plot.frange(bootScale[0], bootScale[1], bootScale[1]/bootScale[2]) + bootScale = Plot.frange( + bootScale[0], bootScale[1], bootScale[1] / bootScale[2]) bootScale = bootScale[:-1] + [highestPercent] - bootOffset = 50*fontZoom - bootScaleFont=ImageFont.truetype(font=VERDANA_FILE, size=13*fontZoom) + bootOffset = 50 * fontZoom + bootScaleFont = ImageFont.truetype( + font=VERDANA_FILE, size=13 * fontZoom) im_drawer.rectangle( - xy=((canvas.size[0]-bootOffset, yZero-bootHeightThresh), - (canvas.size[0]-bootOffset-15*zoom, yZero)), - fill = YELLOW, outline=BLACK) + xy=((canvas.size[0] - bootOffset, yZero - bootHeightThresh), + (canvas.size[0] - bootOffset - 15 * zoom, yZero)), + fill=YELLOW, outline=BLACK) im_drawer.line( - xy=((canvas.size[0]-bootOffset+4, yZero), - (canvas.size[0]-bootOffset, yZero)), + xy=((canvas.size[0] - bootOffset + 4, yZero), + (canvas.size[0] - bootOffset, yZero)), fill=BLACK) TEXT_Y_DISPLACEMENT = -8 - im_drawer.text(xy=(canvas.size[0]-bootOffset+10, yZero+TEXT_Y_DISPLACEMENT), text='0%', + im_drawer.text(xy=(canvas.size[0] - bootOffset + 10, yZero + TEXT_Y_DISPLACEMENT), text='0%', font=bootScaleFont, fill=BLACK) for item in bootScale: if item == 0: continue - bootY = yZero-bootHeightThresh*item/highestPercent + bootY = yZero - bootHeightThresh * item / highestPercent im_drawer.line( - xy=((canvas.size[0]-bootOffset+4, bootY), - (canvas.size[0]-bootOffset, bootY)), + xy=((canvas.size[0] - bootOffset + 4, bootY), + (canvas.size[0] - bootOffset, bootY)), fill=BLACK) - im_drawer.text(xy=(canvas.size[0]-bootOffset+10, bootY+TEXT_Y_DISPLACEMENT), - text='%2.1f'%item, font=bootScaleFont, fill=BLACK) + im_drawer.text(xy=(canvas.size[0] - bootOffset + 10, bootY + TEXT_Y_DISPLACEMENT), + text='%2.1f' % item, font=bootScaleFont, fill=BLACK) if self.legendChecked: if hasattr(self.traitList[0], 'chr') and hasattr(self.traitList[0], 'mb'): startPosY = 30 else: startPosY = 15 - smallLabelFont = ImageFont.truetype(font=TREBUC_FILE, size=12*fontZoom) + smallLabelFont = ImageFont.truetype( + font=TREBUC_FILE, size=12 * fontZoom) leftOffset = canvas.size[0] - xRightOffset - 190 im_drawer.rectangle( - xy=((leftOffset, startPosY-6), (leftOffset+12, startPosY+6)), + xy=((leftOffset, startPosY - 6), + (leftOffset + 12, startPosY + 6)), fill=YELLOW, outline=BLACK) - im_drawer.text(xy=(canvas.size[0] - xRightOffset - 170, startPosY+TEXT_Y_DISPLACEMENT), + im_drawer.text(xy=(canvas.size[0] - xRightOffset - 170, startPosY + TEXT_Y_DISPLACEMENT), text='Frequency of the Peak LRS', font=smallLabelFont, fill=BLACK) - def drawProbeSetPosition(self, canvas, plotXScale, offset= (40, 120, 80, 10), zoom = 1, startMb = None, endMb = None): + def drawProbeSetPosition(self, canvas, plotXScale, offset=(40, 120, 80, 10), zoom=1, startMb=None, endMb=None): im_drawer = ImageDraw.Draw(canvas) if len(self.traitList) != 1: return @@ -896,21 +934,22 @@ class DisplayMappingResults(object): if self.plotScale == "physic": this_chr = str(self.ChrList[self.selectedChr][0]) else: - this_chr = str(self.ChrList[self.selectedChr][1]+1) + this_chr = str(self.ChrList[self.selectedChr][1] + 1) if self.plotScale == 'physic': if self.selectedChr > -1: if this_chr != Chr or Mb < self.startMb or Mb > self.endMb: return else: - locPixel = xLeftOffset + (Mb-self.startMb)*plotXScale + locPixel = xLeftOffset + (Mb - self.startMb) * plotXScale else: locPixel = xLeftOffset for i, _chr in enumerate(self.ChrList[1:]): if _chr[0] != Chr: - locPixel += (self.ChrLengthDistList[i] + self.GraphInterval)*plotXScale + locPixel += (self.ChrLengthDistList[i] + \ + self.GraphInterval) * plotXScale else: - locPixel += Mb*plotXScale + locPixel += Mb * plotXScale break else: if self.selectedChr > -1: @@ -918,33 +957,37 @@ class DisplayMappingResults(object): if qtlresult['chr'] != self.selectedChr: continue - if i==0 and qtlresult['Mb'] >= Mb: - locPixel=-1 + if i == 0 and qtlresult['Mb'] >= Mb: + locPixel = -1 break - #the trait's position is between two traits - if i > 0 and self.qtlresults[i-1]['Mb'] < Mb and qtlresult['Mb'] >= Mb: - locPixel = xLeftOffset + plotXScale*(self.qtlresults[i-1]['Mb']+(qtlresult['Mb']-self.qtlresults[i-1]['Mb'])*(Mb - self.qtlresults[i-1]['Mb'])/(qtlresult['Mb']-self.qtlresults[i-1]['Mb'])) + # the trait's position is between two traits + if i > 0 and self.qtlresults[i - 1]['Mb'] < Mb and qtlresult['Mb'] >= Mb: + locPixel = xLeftOffset + plotXScale * (self.qtlresults[i - 1]['Mb'] + (qtlresult['Mb'] - self.qtlresults[i - 1]['Mb']) * ( + Mb - self.qtlresults[i - 1]['Mb']) / (qtlresult['Mb'] - self.qtlresults[i - 1]['Mb'])) break - #the trait's position is on the right of the last genotype - if i==len(self.qtlresults) and Mb>=qtlresult['Mb']: + # the trait's position is on the right of the last genotype + if i == len(self.qtlresults) and Mb >= qtlresult['Mb']: locPixel = -1 else: locPixel = xLeftOffset for i, _chr in enumerate(self.ChrList): - if i < (len(self.ChrList)-1): + if i < (len(self.ChrList) - 1): if _chr != Chr: - locPixel += (self.ChrLengthDistList[i] + self.GraphInterval)*plotXScale + locPixel += (self.ChrLengthDistList[i] + \ + self.GraphInterval) * plotXScale else: - locPixel += (Mb*(_chr[-1].cM-_chr[0].cM)/self.ChrLengthCMList[i])*plotXScale + locPixel += (Mb * (_chr[-1].cM - _chr[0].cM) / \ + self.ChrLengthCMList[i]) * plotXScale break if locPixel >= 0 and self.plotScale == 'physic': - traitPixel = ((locPixel, yZero), (locPixel-7, yZero+14), (locPixel+7, yZero+14)) + traitPixel = ((locPixel, yZero), (locPixel - 7, + yZero + 14), (locPixel + 7, yZero + 14)) draw_open_polygon(canvas, xy=traitPixel, outline=BLACK, fill=self.TRANSCRIPT_LOCATION_COLOR) - def drawSNPTrackNew(self, canvas, offset= (40, 120, 80, 10), zoom = 1, startMb = None, endMb = None): + def drawSNPTrackNew(self, canvas, offset=(40, 120, 80, 10), zoom=1, startMb=None, endMb=None): im_drawer = ImageDraw.Draw(canvas) if self.plotScale != 'physic' or self.selectedChr == -1 or not self.diffCol: return @@ -963,36 +1006,37 @@ class DisplayMappingResults(object): #chrName = self.genotype[0].name chrName = self.ChrList[self.selectedChr][0] - stepMb = (endMb-startMb)/plotWidth + stepMb = (endMb - startMb) / plotWidth strainId1, strainId2 = self.diffCol SNPCounts = [] - while startMb<endMb: + while startMb < endMb: snp_count = g.db.execute(""" select count(*) from BXDSnpPosition where Chr = '%s' AND Mb >= %2.6f AND Mb < %2.6f AND StrainId1 = %d AND StrainId2 = %d - """ % (chrName, startMb, startMb+stepMb, strainId1, strainId2)).fetchone()[0] + """ % (chrName, startMb, startMb + stepMb, strainId1, strainId2)).fetchone()[0] SNPCounts.append(snp_count) startMb += stepMb if (len(SNPCounts) > 0): maxCount = max(SNPCounts) - if maxCount>0: + if maxCount > 0: for i in range(xLeftOffset, xLeftOffset + plotWidth): - snpDensity = float(SNPCounts[i-xLeftOffset]*SNP_HEIGHT_MODIFIER/maxCount) + snpDensity = float( + SNPCounts[i - xLeftOffset] * SNP_HEIGHT_MODIFIER / maxCount) im_drawer.line( - xy=((i, drawSNPLocationY+(snpDensity)*zoom), - (i, drawSNPLocationY-(snpDensity)*zoom)), + xy=((i, drawSNPLocationY + (snpDensity) * zoom), + (i, drawSNPLocationY - (snpDensity) * zoom)), fill=self.SNP_COLOR, width=1) - def drawMultiTraitName(self, fd, canvas, gifmap, showLocusForm, offset= (40, 120, 80, 10), zoom = 1): + def drawMultiTraitName(self, fd, canvas, gifmap, showLocusForm, offset=(40, 120, 80, 10), zoom=1): nameWidths = [] yPaddingTop = 10 - colorFont=ImageFont.truetype(font=TREBUC_FILE, size=12) - if len(self.qtlresults) >20 and self.selectedChr > -1: + colorFont = ImageFont.truetype(font=TREBUC_FILE, size=12) + if len(self.qtlresults) > 20 and self.selectedChr > -1: rightShift = 20 rightShiftStep = 60 rectWidth = 10 @@ -1004,7 +1048,7 @@ class DisplayMappingResults(object): for k, thisTrait in enumerate(self.traitList): thisLRSColor = self.colorCollection[k] kstep = k % 4 - if k!=0 and kstep==0: + if k != 0 and kstep == 0: if nameWidths: rightShiftStep = max(nameWidths[-4:]) + rectWidth + 20 rightShift += rightShiftStep @@ -1014,19 +1058,23 @@ class DisplayMappingResults(object): nameWidths.append(nameWidth) im_drawer.rectangle( - xy=((rightShift, yPaddingTop+kstep*15), - (rectWidth+rightShift, yPaddingTop+10+kstep*15)), + xy=((rightShift, yPaddingTop + kstep * 15), + (rectWidth + rightShift, yPaddingTop + 10 + kstep * 15)), fill=thisLRSColor, outline=BLACK) im_drawer.text( - text=name, xy=(rectWidth+2+rightShift, yPaddingTop+10+kstep*15), + text=name, xy=(rectWidth + 2 + rightShift, + yPaddingTop + 10 + kstep * 15), font=colorFont, fill=BLACK) if thisTrait.db: - COORDS = "%d,%d,%d,%d" %(rectWidth+2+rightShift, yPaddingTop+kstep*15, rectWidth+2+rightShift+nameWidth, yPaddingTop+10+kstep*15,) - HREF= "javascript:showDatabase3('%s','%s','%s','');" % (showLocusForm, thisTrait.db.name, thisTrait.name) - Areas = HtmlGenWrapper.create_area_tag(shape='rect', coords=COORDS, href=HREF) - gifmap.append(Areas) ### TODO + COORDS = "%d,%d,%d,%d" % (rectWidth + 2 + rightShift, yPaddingTop + kstep * \ + 15, rectWidth + 2 + rightShift + nameWidth, yPaddingTop + 10 + kstep * 15,) + HREF = "javascript:showDatabase3('%s','%s','%s','');" % ( + showLocusForm, thisTrait.db.name, thisTrait.name) + Areas = HtmlGenWrapper.create_area_tag( + shape='rect', coords=COORDS, href=HREF) + gifmap.append(Areas) # TODO - def drawLegendPanel(self, canvas, offset= (40, 120, 80, 10), zoom = 1): + def drawLegendPanel(self, canvas, offset=(40, 120, 80, 10), zoom=1): im_drawer = ImageDraw.Draw(canvas) xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset plotWidth = canvas.size[0] - xLeftOffset - xRightOffset @@ -1037,80 +1085,82 @@ class DisplayMappingResults(object): if zoom == 2: fontZoom = 1.5 - labelFont=ImageFont.truetype(font=TREBUC_FILE, size=12*fontZoom) + labelFont = ImageFont.truetype(font=TREBUC_FILE, size=12 * fontZoom) startPosY = 15 - stepPosY = 12*fontZoom + stepPosY = 12 * fontZoom startPosX = canvas.size[0] - xRightOffset - 415 if hasattr(self.traitList[0], 'chr') and hasattr(self.traitList[0], 'mb'): startPosY = 15 nCol = 2 - smallLabelFont = ImageFont.truetype(font=TREBUC_FILE, size=12*fontZoom) + smallLabelFont = ImageFont.truetype( + font=TREBUC_FILE, size=12 * fontZoom) leftOffset = canvas.size[0] - xRightOffset - 190 draw_open_polygon( canvas, xy=( - (leftOffset + 6, startPosY-7), - (leftOffset - 1, startPosY+7), - (leftOffset + 13, startPosY+7)), + (leftOffset + 6, startPosY - 7), + (leftOffset - 1, startPosY + 7), + (leftOffset + 13, startPosY + 7)), outline=BLACK, fill=self.TRANSCRIPT_LOCATION_COLOR ) TEXT_Y_DISPLACEMENT = -8 im_drawer.text( text="Sequence Site", - xy=(leftOffset + 20, startPosY+TEXT_Y_DISPLACEMENT), font=smallLabelFont, + xy=(leftOffset + 20, startPosY + TEXT_Y_DISPLACEMENT), font=smallLabelFont, fill=self.TOP_RIGHT_INFO_COLOR) if self.manhattan_plot != True: im_drawer.line( - xy=((startPosX, startPosY), (startPosX+32, startPosY)), + xy=((startPosX, startPosY), (startPosX + 32, startPosY)), fill=self.LRS_COLOR, width=2) im_drawer.text( - text=self.LRS_LOD, xy=(startPosX+40, startPosY+TEXT_Y_DISPLACEMENT), + text=self.LRS_LOD, xy=( + startPosX + 40, startPosY + TEXT_Y_DISPLACEMENT), font=labelFont, fill=BLACK) startPosY += stepPosY if self.additiveChecked: im_drawer.line( - xy=((startPosX, startPosY), (startPosX+17, startPosY)), + xy=((startPosX, startPosY), (startPosX + 17, startPosY)), fill=self.ADDITIVE_COLOR_POSITIVE, width=2) im_drawer.line( - xy=((startPosX+18, startPosY), (startPosX+32, startPosY)), + xy=((startPosX + 18, startPosY), (startPosX + 32, startPosY)), fill=self.ADDITIVE_COLOR_NEGATIVE, width=2) im_drawer.text( - text='Additive Effect', xy=(startPosX+40, startPosY+TEXT_Y_DISPLACEMENT), + text='Additive Effect', xy=(startPosX + 40, startPosY + TEXT_Y_DISPLACEMENT), font=labelFont, fill=BLACK) startPosY += stepPosY if self.genotype.type == 'intercross' and self.dominanceChecked: im_drawer.line( - xy=((startPosX, startPosY), (startPosX+17, startPosY)), + xy=((startPosX, startPosY), (startPosX + 17, startPosY)), fill=self.DOMINANCE_COLOR_POSITIVE, width=4) im_drawer.line( - xy=((startPosX+18, startPosY), (startPosX+35, startPosY)), + xy=((startPosX + 18, startPosY), (startPosX + 35, startPosY)), fill=self.DOMINANCE_COLOR_NEGATIVE, width=4) im_drawer.text( - text='Dominance Effect', xy=(startPosX+42, startPosY+5), + text='Dominance Effect', xy=(startPosX + 42, startPosY + 5), font=labelFont, fill=BLACK) startPosY += stepPosY if self.haplotypeAnalystChecked: im_drawer.line( - xy=((startPosX-34, startPosY), (startPosX-17, startPosY)), + xy=((startPosX - 34, startPosY), (startPosX - 17, startPosY)), fill=self.HAPLOTYPE_POSITIVE, width=4) im_drawer.line( - xy=((startPosX-17, startPosY), (startPosX, startPosY)), + xy=((startPosX - 17, startPosY), (startPosX, startPosY)), fill=self.HAPLOTYPE_NEGATIVE, width=4) im_drawer.line( - xy=((startPosX, startPosY), (startPosX+17, startPosY)), + xy=((startPosX, startPosY), (startPosX + 17, startPosY)), fill=self.HAPLOTYPE_HETEROZYGOUS, width=4) im_drawer.line( - xy=((startPosX+17, startPosY), (startPosX+34, startPosY)), + xy=((startPosX + 17, startPosY), (startPosX + 34, startPosY)), fill=self.HAPLOTYPE_RECOMBINATION, width=4) im_drawer.text( text='Haplotypes (Pat, Mat, Het, Unk)', - xy=(startPosX+41, startPosY+TEXT_Y_DISPLACEMENT), font=labelFont, fill=BLACK) + xy=(startPosX + 41, startPosY + TEXT_Y_DISPLACEMENT), font=labelFont, fill=BLACK) startPosY += stepPosY if self.permChecked and self.nperm > 0: @@ -1118,26 +1168,29 @@ class DisplayMappingResults(object): if self.multipleInterval and not self.bootChecked: thisStartX = canvas.size[0] - xRightOffset - 205 im_drawer.line( - xy=((thisStartX, startPosY), ( startPosX + 32, startPosY)), + xy=((thisStartX, startPosY), (startPosX + 32, startPosY)), fill=self.SIGNIFICANT_COLOR, width=self.SIGNIFICANT_WIDTH) im_drawer.line( - xy=((thisStartX, startPosY + stepPosY), ( startPosX + 32, startPosY + stepPosY)), + xy=((thisStartX, startPosY + stepPosY), + (startPosX + 32, startPosY + stepPosY)), fill=self.SUGGESTIVE_COLOR, width=self.SUGGESTIVE_WIDTH) im_drawer.text( - text='Significant %s = %2.2f' % (self.LRS_LOD, self.significant), - xy=(thisStartX+40, startPosY+TEXT_Y_DISPLACEMENT), font=labelFont, fill=BLACK) + text='Significant %s = %2.2f' % ( + self.LRS_LOD, self.significant), + xy=(thisStartX + 40, startPosY + TEXT_Y_DISPLACEMENT), font=labelFont, fill=BLACK) im_drawer.text( text='Suggestive %s = %2.2f' % (self.LRS_LOD, self.suggestive), - xy=(thisStartX+40, startPosY + TEXT_Y_DISPLACEMENT +stepPosY), font=labelFont, + xy=(thisStartX + 40, startPosY + TEXT_Y_DISPLACEMENT + stepPosY), font=labelFont, fill=BLACK) - labelFont = ImageFont.truetype(font=VERDANA_FILE, size=12*fontZoom) + labelFont = ImageFont.truetype(font=VERDANA_FILE, size=12 * fontZoom) labelColor = BLACK if self.dataset.type == "Publish" or self.dataset.type == "Geno": dataset_label = self.dataset.fullname else: - dataset_label = "%s - %s" % (self.dataset.group.name, self.dataset.fullname) + dataset_label = "%s - %s" % (self.dataset.group.name, + self.dataset.fullname) string1 = 'Dataset: %s' % (dataset_label) @@ -1154,7 +1207,8 @@ class DisplayMappingResults(object): string3 = 'Using GEMMA mapping method with ' if self.covariates != "": string3 += 'the cofactors below:' - cofactor_names = ", ".join([covar.split(":")[0] for covar in self.covariates.split(",")]) + cofactor_names = ", ".join( + [covar.split(":")[0] for covar in self.covariates.split(",")]) string4 = cofactor_names else: string3 += 'no cofactors' @@ -1162,7 +1216,8 @@ class DisplayMappingResults(object): string3 = 'Using R/qtl mapping method with ' if self.covariates != "": string3 += 'the cofactors below:' - cofactor_names = ", ".join([covar.split(":")[0] for covar in self.covariates.split(",")]) + cofactor_names = ", ".join( + [covar.split(":")[0] for covar in self.covariates.split(",")]) string4 = cofactor_names elif self.controlLocus and self.doControl != "false": string3 += '%s as control' % self.controlLocus @@ -1180,32 +1235,36 @@ class DisplayMappingResults(object): if self.selectedChr == -1: identification = "Mapping on All Chromosomes for " else: - identification = "Mapping on Chromosome %s for " % (self.ChrList[self.selectedChr][0]) + identification = "Mapping on Chromosome %s for " % ( + self.ChrList[self.selectedChr][0]) if self.this_trait.symbol: - identification += "Trait: %s - %s" % (self.this_trait.name, self.this_trait.symbol) + identification += "Trait: %s - %s" % ( + self.this_trait.name, self.this_trait.symbol) elif self.dataset.type == "Publish": if self.this_trait.post_publication_abbreviation: - identification += "Trait: %s - %s" % (self.this_trait.name, self.this_trait.post_publication_abbreviation) + identification += "Trait: %s - %s" % ( + self.this_trait.name, self.this_trait.post_publication_abbreviation) elif self.this_trait.pre_publication_abbreviation: - identification += "Trait: %s - %s" % (self.this_trait.name, self.this_trait.pre_publication_abbreviation) + identification += "Trait: %s - %s" % ( + self.this_trait.name, self.this_trait.pre_publication_abbreviation) else: identification += "Trait: %s" % (self.this_trait.name) else: identification += "Trait: %s" % (self.this_trait.name) identification += " with %s samples" % (self.n_samples) - d = 4+ max( + d = 4 + max( im_drawer.textsize(identification, font=labelFont)[0], im_drawer.textsize(string1, font=labelFont)[0], im_drawer.textsize(string2, font=labelFont)[0]) im_drawer.text( text=identification, - xy=(xLeftOffset, y_constant*fontZoom), font=labelFont, + xy=(xLeftOffset, y_constant * fontZoom), font=labelFont, fill=labelColor) y_constant += 15 else: - d = 4+ max( + d = 4 + max( im_drawer.textsize(string1, font=labelFont)[0], im_drawer.textsize(string2, font=labelFont)[0]) @@ -1223,28 +1282,28 @@ class DisplayMappingResults(object): transform_text += "Invert +/-" im_drawer.text( - text=transform_text, xy=(xLeftOffset, y_constant*fontZoom), + text=transform_text, xy=(xLeftOffset, y_constant * fontZoom), font=labelFont, fill=labelColor) y_constant += 15 im_drawer.text( - text=string1, xy=(xLeftOffset, y_constant*fontZoom), + text=string1, xy=(xLeftOffset, y_constant * fontZoom), font=labelFont, fill=labelColor) y_constant += 15 im_drawer.text( - text=string2, xy=(xLeftOffset, y_constant*fontZoom), + text=string2, xy=(xLeftOffset, y_constant * fontZoom), font=labelFont, fill=labelColor) y_constant += 15 if string3 != '': im_drawer.text( - text=string3, xy=(xLeftOffset, y_constant*fontZoom), + text=string3, xy=(xLeftOffset, y_constant * fontZoom), font=labelFont, fill=labelColor) y_constant += 15 if string4 != '': im_drawer.text( - text=string4, xy=(xLeftOffset, y_constant*fontZoom), + text=string4, xy=(xLeftOffset, y_constant * fontZoom), font=labelFont, fill=labelColor) - def drawGeneBand(self, canvas, gifmap, plotXScale, offset= (40, 120, 80, 10), zoom = 1, startMb = None, endMb = None): + def drawGeneBand(self, canvas, gifmap, plotXScale, offset=(40, 120, 80, 10), zoom=1, startMb=None, endMb=None): im_drawer = ImageDraw.Draw(canvas) if self.plotScale != 'physic' or self.selectedChr == -1 or not self.geneCol: return @@ -1264,11 +1323,12 @@ class DisplayMappingResults(object): if self.dataset.group.species == "mouse": txStart = theGO["TxStart"] txEnd = theGO["TxEnd"] - geneLength = (txEnd - txStart)*1000.0 - tenPercentLength = geneLength*0.0001 - SNPdensity = theGO["snpCount"]/geneLength + geneLength = (txEnd - txStart) * 1000.0 + tenPercentLength = geneLength * 0.0001 + SNPdensity = theGO["snpCount"] / geneLength - exonStarts = list(map(float, theGO['exonStarts'].split(",")[:-1])) + exonStarts = list( + map(float, theGO['exonStarts'].split(",")[:-1])) exonEnds = list(map(float, theGO['exonEnds'].split(",")[:-1])) cdsStart = theGO['cdsStart'] cdsEnd = theGO['cdsEnd'] @@ -1277,23 +1337,26 @@ class DisplayMappingResults(object): strand = theGO["Strand"] exonCount = theGO["exonCount"] - geneStartPix = xLeftOffset + plotXScale*(float(txStart) - startMb) - geneEndPix = xLeftOffset + plotXScale*(float(txEnd) - startMb) #at least one pixel + geneStartPix = xLeftOffset + \ + plotXScale * (float(txStart) - startMb) + geneEndPix = xLeftOffset + plotXScale * \ + (float(txEnd) - startMb) # at least one pixel if (geneEndPix < xLeftOffset): - return; # this gene is not on the screen + return # this gene is not on the screen elif (geneEndPix > xLeftOffset + plotWidth): - geneEndPix = xLeftOffset + plotWidth; # clip the last in-range gene + geneEndPix = xLeftOffset + plotWidth # clip the last in-range gene if (geneStartPix > xLeftOffset + plotWidth): - return; # we are outside the valid on-screen range, so stop drawing genes + return # we are outside the valid on-screen range, so stop drawing genes elif (geneStartPix < xLeftOffset): - geneStartPix = xLeftOffset; # clip the first in-range gene + geneStartPix = xLeftOffset # clip the first in-range gene - #color the gene based on SNP density - #found earlier, needs to be recomputed as snps are added - #always apply colors now, even if SNP Track not checked - Zach 11/24/2010 + # color the gene based on SNP density + # found earlier, needs to be recomputed as snps are added + # always apply colors now, even if SNP Track not checked - Zach 11/24/2010 - densities=[1.0000000000000001e-05, 0.094094033555233408, 0.3306166377816987, 0.88246026851027781, 2.6690084029581951, 4.1, 61.0] + densities = [1.0000000000000001e-05, 0.094094033555233408, + 0.3306166377816987, 0.88246026851027781, 2.6690084029581951, 4.1, 61.0] if SNPdensity < densities[0]: myColor = BLACK elif SNPdensity < densities[1]: @@ -1310,11 +1373,12 @@ class DisplayMappingResults(object): myColor = DARKRED outlineColor = myColor - fillColor = myColor + fillColor = myColor - TITLE = "Gene: %s (%s)\nFrom %2.3f to %2.3f Mb (%s)\nNum. exons: %d." % (geneSymbol, accession, float(txStart), float(txEnd), strand, exonCount) + TITLE = "Gene: %s (%s)\nFrom %2.3f to %2.3f Mb (%s)\nNum. exons: %d." % ( + geneSymbol, accession, float(txStart), float(txEnd), strand, exonCount) # NL: 06-02-2011 Rob required to change this link for gene related - HREF=geneNCBILink %geneSymbol + HREF = geneNCBILink % geneSymbol elif self.dataset.group.species == "rat": exonStarts = [] @@ -1327,85 +1391,92 @@ class DisplayMappingResults(object): strand = theGO["Strand"] exonCount = 0 - geneStartPix = xLeftOffset + plotXScale*(float(txStart) - startMb) - geneEndPix = xLeftOffset + plotXScale*(float(txEnd) - startMb) #at least one pixel + geneStartPix = xLeftOffset + \ + plotXScale * (float(txStart) - startMb) + geneEndPix = xLeftOffset + plotXScale * \ + (float(txEnd) - startMb) # at least one pixel if (geneEndPix < xLeftOffset): - return; # this gene is not on the screen + return # this gene is not on the screen elif (geneEndPix > xLeftOffset + plotWidth): - geneEndPix = xLeftOffset + plotWidth; # clip the last in-range gene + geneEndPix = xLeftOffset + plotWidth # clip the last in-range gene if (geneStartPix > xLeftOffset + plotWidth): - return; # we are outside the valid on-screen range, so stop drawing genes + return # we are outside the valid on-screen range, so stop drawing genes elif (geneStartPix < xLeftOffset): - geneStartPix = xLeftOffset; # clip the first in-range gene + geneStartPix = xLeftOffset # clip the first in-range gene outlineColor = DARKBLUE fillColor = DARKBLUE - TITLE = "Gene: %s\nFrom %2.3f to %2.3f Mb (%s)" % (geneSymbol, float(txStart), float(txEnd), strand) + TITLE = "Gene: %s\nFrom %2.3f to %2.3f Mb (%s)" % ( + geneSymbol, float(txStart), float(txEnd), strand) # NL: 06-02-2011 Rob required to change this link for gene related - HREF=geneNCBILink %geneSymbol + HREF = geneNCBILink % geneSymbol else: outlineColor = ORANGE fillColor = ORANGE TITLE = "Gene: %s" % geneSymbol - #Draw Genes - geneYLocation = yPaddingTop + (gIndex % self.NUM_GENE_ROWS) * self.EACH_GENE_HEIGHT*zoom + # Draw Genes + geneYLocation = yPaddingTop + \ + (gIndex % self.NUM_GENE_ROWS) * self.EACH_GENE_HEIGHT * zoom if self.dataset.group.species == "mouse" or self.dataset.group.species == "rat": - geneYLocation += 4*self.BAND_HEIGHT + 4*self.BAND_SPACING + geneYLocation += 4 * self.BAND_HEIGHT + 4 * self.BAND_SPACING else: - geneYLocation += 3*self.BAND_HEIGHT + 3*self.BAND_SPACING + geneYLocation += 3 * self.BAND_HEIGHT + 3 * self.BAND_SPACING - #draw the detail view + # draw the detail view if self.endMb - self.startMb <= self.DRAW_DETAIL_MB and geneEndPix - geneStartPix > self.EACH_GENE_ARROW_SPACING * 3: utrColor = ImageColor.getrgb("rgb(66%, 66%, 66%)") arrowColor = ImageColor.getrgb("rgb(70%, 70%, 70%)") - #draw the line that runs the entire length of the gene + # draw the line that runs the entire length of the gene im_drawer.line( xy=( - (geneStartPix, geneYLocation + self.EACH_GENE_HEIGHT/2*zoom), - ( geneEndPix, geneYLocation + self.EACH_GENE_HEIGHT/2*zoom)), + (geneStartPix, geneYLocation + \ + self.EACH_GENE_HEIGHT / 2 * zoom), + (geneEndPix, geneYLocation + self.EACH_GENE_HEIGHT / 2 * zoom)), fill=outlineColor, width=1) - #draw the arrows + # draw the arrows if geneEndPix - geneStartPix < 1: genePixRange = 1 else: genePixRange = int(geneEndPix - geneStartPix) for xCoord in range(0, genePixRange): - if (xCoord % self.EACH_GENE_ARROW_SPACING == 0 and xCoord + self.EACH_GENE_ARROW_SPACING < geneEndPix-geneStartPix) or xCoord == 0: + if (xCoord % self.EACH_GENE_ARROW_SPACING == 0 and xCoord + self.EACH_GENE_ARROW_SPACING < geneEndPix - geneStartPix) or xCoord == 0: if strand == "+": im_drawer.line( xy=((geneStartPix + xCoord, geneYLocation), (geneStartPix + xCoord + self.EACH_GENE_ARROW_WIDTH, - geneYLocation +(self.EACH_GENE_HEIGHT / 2)*zoom)), + geneYLocation + (self.EACH_GENE_HEIGHT / 2) * zoom)), fill=arrowColor, width=1) im_drawer.line( xy=((geneStartPix + xCoord, - geneYLocation + self.EACH_GENE_HEIGHT*zoom), - (geneStartPix + xCoord+self.EACH_GENE_ARROW_WIDTH, + geneYLocation + self.EACH_GENE_HEIGHT * zoom), + (geneStartPix + xCoord + self.EACH_GENE_ARROW_WIDTH, geneYLocation + (self.EACH_GENE_HEIGHT / 2) * zoom)), fill=arrowColor, width=1) else: im_drawer.line( xy=((geneStartPix + xCoord + self.EACH_GENE_ARROW_WIDTH, geneYLocation), - ( geneStartPix + xCoord, - geneYLocation +(self.EACH_GENE_HEIGHT / 2)*zoom)), + (geneStartPix + xCoord, + geneYLocation + (self.EACH_GENE_HEIGHT / 2) * zoom)), fill=arrowColor, width=1) im_drawer.line( xy=((geneStartPix + xCoord + self.EACH_GENE_ARROW_WIDTH, - geneYLocation + self.EACH_GENE_HEIGHT*zoom), - ( geneStartPix + xCoord, - geneYLocation + (self.EACH_GENE_HEIGHT / 2)*zoom)), + geneYLocation + self.EACH_GENE_HEIGHT * zoom), + (geneStartPix + xCoord, + geneYLocation + (self.EACH_GENE_HEIGHT / 2) * zoom)), fill=arrowColor, width=1) - #draw the blocks for the exon regions + # draw the blocks for the exon regions for i in range(0, len(exonStarts)): - exonStartPix = (exonStarts[i]-startMb)*plotXScale + xLeftOffset - exonEndPix = (exonEnds[i]-startMb)*plotXScale + xLeftOffset + exonStartPix = ( + exonStarts[i] - startMb) * plotXScale + xLeftOffset + exonEndPix = (exonEnds[i] - startMb) * \ + plotXScale + xLeftOffset if (exonStartPix < xLeftOffset): exonStartPix = xLeftOffset if (exonEndPix < xLeftOffset): @@ -1416,13 +1487,14 @@ class DisplayMappingResults(object): exonStartPix = xLeftOffset + plotWidth im_drawer.rectangle( xy=((exonStartPix, geneYLocation), - (exonEndPix, (geneYLocation + self.EACH_GENE_HEIGHT*zoom))), - outline = outlineColor, fill = fillColor) + (exonEndPix, (geneYLocation + self.EACH_GENE_HEIGHT * zoom))), + outline=outlineColor, fill=fillColor) - #draw gray blocks for 3' and 5' UTR blocks + # draw gray blocks for 3' and 5' UTR blocks if cdsStart and cdsEnd: - utrStartPix = (txStart-startMb)*plotXScale + xLeftOffset - utrEndPix = (cdsStart-startMb)*plotXScale + xLeftOffset + utrStartPix = (txStart - startMb) * \ + plotXScale + xLeftOffset + utrEndPix = (cdsStart - startMb) * plotXScale + xLeftOffset if (utrStartPix < xLeftOffset): utrStartPix = xLeftOffset if (utrEndPix < xLeftOffset): @@ -1439,13 +1511,14 @@ class DisplayMappingResults(object): labelText = "5'" im_drawer.text( text=labelText, - xy=(utrStartPix-9, geneYLocation+self.EACH_GENE_HEIGHT), + xy=(utrStartPix - 9, geneYLocation + \ + self.EACH_GENE_HEIGHT), font=ImageFont.truetype(font=ARIAL_FILE, size=2)) - #the second UTR region + # the second UTR region - utrStartPix = (cdsEnd-startMb)*plotXScale + xLeftOffset - utrEndPix = (txEnd-startMb)*plotXScale + xLeftOffset + utrStartPix = (cdsEnd - startMb) * plotXScale + xLeftOffset + utrEndPix = (txEnd - startMb) * plotXScale + xLeftOffset if (utrStartPix < xLeftOffset): utrStartPix = xLeftOffset if (utrEndPix < xLeftOffset): @@ -1462,17 +1535,19 @@ class DisplayMappingResults(object): labelText = "3'" im_drawer.text( text=labelText, - xy=(utrEndPix+2, geneYLocation+self.EACH_GENE_HEIGHT), + xy=(utrEndPix + 2, geneYLocation + \ + self.EACH_GENE_HEIGHT), font=ImageFont.truetype(font=ARIAL_FILE, size=2)) - #draw the genes as rectangles + # draw the genes as rectangles else: im_drawer.rectangle( xy=((geneStartPix, geneYLocation), - (geneEndPix, (geneYLocation + self.EACH_GENE_HEIGHT*zoom))), - outline= outlineColor, fill = fillColor) + (geneEndPix, (geneYLocation + self.EACH_GENE_HEIGHT * zoom))), + outline=outlineColor, fill=fillColor) - COORDS = "%d, %d, %d, %d" %(geneStartPix, geneYLocation, geneEndPix, (geneYLocation + self.EACH_GENE_HEIGHT)) + COORDS = "%d, %d, %d, %d" % ( + geneStartPix, geneYLocation, geneEndPix, (geneYLocation + self.EACH_GENE_HEIGHT)) # NL: 06-02-2011 Rob required to display NCBI info in a new window gifmap.append( HtmlGenWrapper.create_area_tag( @@ -1482,8 +1557,8 @@ class DisplayMappingResults(object): title=TITLE, target="_blank")) -## BEGIN HaplotypeAnalyst - def drawHaplotypeBand(self, canvas, gifmap, plotXScale, offset= (40, 120, 80, 10), zoom = 1, startMb = None, endMb = None): +# BEGIN HaplotypeAnalyst + def drawHaplotypeBand(self, canvas, gifmap, plotXScale, offset=(40, 120, 80, 10), zoom=1, startMb=None, endMb=None): if self.plotScale != 'physic' or self.selectedChr == -1 or not self.geneCol: return @@ -1496,61 +1571,66 @@ class DisplayMappingResults(object): samplelist = list(self.genotype.prgy) - smd=[] + smd = [] for sample in self.sample_vals_dict.keys(): if self.sample_vals_dict[sample] != "x" and sample in samplelist: - temp = GeneralObject(name=sample, value=float(self.sample_vals_dict[sample])) + temp = GeneralObject(name=sample, value=float( + self.sample_vals_dict[sample])) smd.append(temp) else: continue - smd.sort(key = lambda A: A.value) + smd.sort(key=lambda A: A.value) smd.reverse() oldgeneEndPix = -1 - #Initializing plotRight, error before + # Initializing plotRight, error before plotRight = xRightOffset im_drawer = ImageDraw.Draw(canvas) -#### find out PlotRight +# find out PlotRight for _chr in self.genotype: if _chr.name == self.ChrList[self.selectedChr][0]: for i, _locus in enumerate(_chr): txStart = _chr[i].Mb - txEnd = _chr[i].Mb + txEnd = _chr[i].Mb - geneStartPix = xLeftOffset + plotXScale*(float(txStart) - startMb) - 0 - geneEndPix = xLeftOffset + plotXScale*(float(txEnd) - startMb) - 0 + geneStartPix = xLeftOffset + plotXScale * \ + (float(txStart) - startMb) - 0 + geneEndPix = xLeftOffset + plotXScale * \ + (float(txEnd) - startMb) - 0 drawit = 1 if (geneStartPix < xLeftOffset): - drawit = 0; + drawit = 0 if (geneStartPix > xLeftOffset + plotWidth): - drawit = 0; + drawit = 0 if drawit == 1: - if _chr[i].name != " - " : + if _chr[i].name != " - ": plotRight = geneEndPix + 4 -#### end find out PlotRight +# end find out PlotRight firstGene = 1 lastGene = 0 - #Sets the length to the length of the strain list. Beforehand, "oldgeno = self.genotype[0][i].genotype" - #was the only place it was initialized, which worked as long as the very start (startMb = None/0) wasn't being mapped. - #Now there should always be some value set for "oldgeno" - Zach 12/14/2010 - oldgeno = [None]*len(self.strainlist) + # Sets the length to the length of the strain list. Beforehand, "oldgeno = self.genotype[0][i].genotype" + # was the only place it was initialized, which worked as long as the very start (startMb = None/0) wasn't being mapped. + # Now there should always be some value set for "oldgeno" - Zach 12/14/2010 + oldgeno = [None] * len(self.strainlist) for i, _chr in enumerate(self.genotype): if _chr.name == self.ChrList[self.selectedChr][0]: for j, _locus in enumerate(_chr): txStart = _chr[j].Mb - txEnd = _chr[j].Mb + txEnd = _chr[j].Mb - geneStartPix = xLeftOffset + plotXScale*(float(txStart) - startMb) - 0 - geneEndPix = xLeftOffset + plotXScale*(float(txEnd) - startMb) + 0 + geneStartPix = xLeftOffset + plotXScale * \ + (float(txStart) - startMb) - 0 + geneEndPix = xLeftOffset + plotXScale * \ + (float(txEnd) - startMb) + 0 if oldgeneEndPix >= xLeftOffset: drawStart = oldgeneEndPix + 4 @@ -1582,36 +1662,38 @@ class DisplayMappingResults(object): if drawit == 1: myColor = DARKBLUE outlineColor = myColor - fillColor = myColor + fillColor = myColor - maxind=0 + maxind = 0 - #Draw Genes + # Draw Genes - geneYLocation = yPaddingTop + self.NUM_GENE_ROWS * (self.EACH_GENE_HEIGHT)*zoom + geneYLocation = yPaddingTop + self.NUM_GENE_ROWS * \ + (self.EACH_GENE_HEIGHT) * zoom if self.dataset.group.species == "mouse" or self.dataset.group.species == "rat": - geneYLocation += 4*self.BAND_HEIGHT + 4*self.BAND_SPACING + geneYLocation += 4 * self.BAND_HEIGHT + 4 * self.BAND_SPACING else: - geneYLocation += 3*self.BAND_HEIGHT + 3*self.BAND_SPACING + geneYLocation += 3 * self.BAND_HEIGHT + 3 * self.BAND_SPACING - if _chr[j].name != " - " : + if _chr[j].name != " - ": if (firstGene == 1) and (lastGene != 1): oldgeneEndPix = drawStart = xLeftOffset oldgeno = _chr[j].genotype continue - for k, _geno in enumerate (_chr[j].genotype): - plotbxd=0 + for k, _geno in enumerate(_chr[j].genotype): + plotbxd = 0 if samplelist[k] in [item.name for item in smd]: - plotbxd=1 + plotbxd = 1 if (plotbxd == 1): ind = 0 if samplelist[k] in [item.name for item in smd]: - ind = [item.name for item in smd].index(samplelist[k]) + ind = [item.name for item in smd].index( + samplelist[k]) - maxind=max(ind, maxind) + maxind = max(ind, maxind) # lines if (oldgeno[k] == -1 and _geno == -1): @@ -1621,28 +1703,29 @@ class DisplayMappingResults(object): elif (oldgeno[k] == 0 and _geno == 0): mylineColor = self.HAPLOTYPE_HETEROZYGOUS else: - mylineColor = self.HAPLOTYPE_RECOMBINATION # XZ: Unknown + mylineColor = self.HAPLOTYPE_RECOMBINATION # XZ: Unknown im_drawer.line( xy=((drawStart, - geneYLocation+7+2*ind*self.EACH_GENE_HEIGHT*zoom), + geneYLocation + 7 + 2 * ind * self.EACH_GENE_HEIGHT * zoom), (drawEnd, - geneYLocation+7+2*ind*self.EACH_GENE_HEIGHT*zoom)), - fill= mylineColor, width=zoom*(self.EACH_GENE_HEIGHT+2)) + geneYLocation + 7 + 2 * ind * self.EACH_GENE_HEIGHT * zoom)), + fill=mylineColor, width=zoom * (self.EACH_GENE_HEIGHT + 2)) - fillColor=BLACK - outlineColor=BLACK + fillColor = BLACK + outlineColor = BLACK if lastGene == 0: im_drawer.rectangle( xy=((geneStartPix, - geneYLocation+2*ind*self.EACH_GENE_HEIGHT*zoom), + geneYLocation + 2 * ind * self.EACH_GENE_HEIGHT * zoom), (geneEndPix, - geneYLocation+2*ind*self.EACH_GENE_HEIGHT+ 2*self.EACH_GENE_HEIGHT*zoom)), + geneYLocation + 2 * ind * self.EACH_GENE_HEIGHT + 2 * self.EACH_GENE_HEIGHT * zoom)), outline=outlineColor, fill=fillColor) - - COORDS = "%d, %d, %d, %d" %(geneStartPix, geneYLocation+ind*self.EACH_GENE_HEIGHT, geneEndPix+1, (geneYLocation + ind*self.EACH_GENE_HEIGHT)) - TITLE = "Strain: %s, marker (%s) \n Position %2.3f Mb." % (samplelist[k], _chr[j].name, float(txStart)) + COORDS = "%d, %d, %d, %d" % ( + geneStartPix, geneYLocation + ind * self.EACH_GENE_HEIGHT, geneEndPix + 1, (geneYLocation + ind * self.EACH_GENE_HEIGHT)) + TITLE = "Strain: %s, marker (%s) \n Position %2.3f Mb." % ( + samplelist[k], _chr[j].name, float(txStart)) HREF = '' gifmap.append( HtmlGenWrapper.create_area_tag( @@ -1658,11 +1741,10 @@ class DisplayMappingResults(object): mylineColor = self.HAPLOTYPE_RECOMBINATION im_drawer.line( xy=((plotRight, - geneYLocation+7+2*ind*self.EACH_GENE_HEIGHT*zoom), + geneYLocation + 7 + 2 * ind * self.EACH_GENE_HEIGHT * zoom), (drawEnd, - geneYLocation+7+2*ind*self.EACH_GENE_HEIGHT*zoom)), - fill= mylineColor, width=zoom*(self.EACH_GENE_HEIGHT+2)) - + geneYLocation + 7 + 2 * ind * self.EACH_GENE_HEIGHT * zoom)), + fill=mylineColor, width=zoom * (self.EACH_GENE_HEIGHT + 2)) if lastGene == 0: draw_rotated_text( @@ -1670,10 +1752,10 @@ class DisplayMappingResults(object): font=ImageFont.truetype(font=VERDANA_FILE, size=12), xy=(geneStartPix, - geneYLocation+17+2*maxind*self.EACH_GENE_HEIGHT*zoom), + geneYLocation + 17 + 2 * maxind * self.EACH_GENE_HEIGHT * zoom), fill=BLACK, angle=-90) - oldgeneEndPix = geneEndPix; + oldgeneEndPix = geneEndPix oldgeno = _chr[j].genotype firstGene = 0 else: @@ -1683,31 +1765,34 @@ class DisplayMappingResults(object): if _chr.name == self.ChrList[self.selectedChr][0]: for j, _geno in enumerate(_chr[1].genotype): - plotbxd=0 + plotbxd = 0 if samplelist[j] in [item.name for item in smd]: - plotbxd=1 + plotbxd = 1 if (plotbxd == 1): - ind = [item.name for item in smd].index(samplelist[j]) - 1 - expr = smd[ind].value + ind = [item.name for item in smd].index( + samplelist[j]) - 1 + expr = smd[ind+1].value # Place where font is hardcoded im_drawer.text( text="%s" % (samplelist[j]), xy=((xLeftOffset + plotWidth + 10), - geneYLocation+11+2*ind*self.EACH_GENE_HEIGHT*zoom), - font=ImageFont.truetype(font=VERDANA_FILE, size=12), + geneYLocation + 11 + 2 * ind * self.EACH_GENE_HEIGHT * zoom), + font=ImageFont.truetype( + font=VERDANA_FILE, size=12), fill=BLACK) im_drawer.text( text="%2.2f" % (expr), xy=((xLeftOffset + plotWidth + 60), - geneYLocation+11+2*ind*self.EACH_GENE_HEIGHT*zoom), - font=ImageFont.truetype(font=VERDANA_FILE, size=12), + geneYLocation + 11 + 2 * ind * self.EACH_GENE_HEIGHT * zoom), + font=ImageFont.truetype( + font=VERDANA_FILE, size=12), fill=BLACK) -## END HaplotypeAnalyst +# END HaplotypeAnalyst - def drawClickBand(self, canvas, gifmap, plotXScale, offset= (40, 120, 80, 10), zoom = 1, startMb = None, endMb = None): + def drawClickBand(self, canvas, gifmap, plotXScale, offset=(40, 120, 80, 10), zoom=1, startMb=None, endMb=None): im_drawer = ImageDraw.Draw(canvas) if self.plotScale != 'physic' or self.selectedChr == -1: return @@ -1724,12 +1809,16 @@ class DisplayMappingResults(object): # but it makes the HTML huge, and takes forever to render the page in the first place) # Draw the bands that you can click on to go to UCSC / Ensembl MAX_CLICKABLE_REGION_DIVISIONS = 100 - clickableRegionLabelFont=ImageFont.truetype(font=VERDANA_FILE, size=9) - pixelStep = max(5, int(float(plotWidth)/MAX_CLICKABLE_REGION_DIVISIONS)) + clickableRegionLabelFont = ImageFont.truetype( + font=VERDANA_FILE, size=9) + pixelStep = max( + 5, int(float(plotWidth) / MAX_CLICKABLE_REGION_DIVISIONS)) # pixelStep: every N pixels, we make a new clickable area for the user to go to that area of the genome. - numBasesCurrentlyOnScreen = self.kONE_MILLION*abs(startMb - endMb) # Number of bases on screen now - flankingWidthInBases = int ( min( (float(numBasesCurrentlyOnScreen) / 2.0), (5*self.kONE_MILLION) ) ) + numBasesCurrentlyOnScreen = self.kONE_MILLION * \ + abs(startMb - endMb) # Number of bases on screen now + flankingWidthInBases = int( + min((float(numBasesCurrentlyOnScreen) / 2.0), (5 * self.kONE_MILLION))) webqtlZoomWidth = numBasesCurrentlyOnScreen / 16.0 # Flanking width should be such that we either zoom in to a 10 million base region, or we show the clicked region at the same scale as we are currently seeing. @@ -1738,23 +1827,33 @@ class DisplayMappingResults(object): paddingTop = yTopOffset if self.dataset.group.species == "mouse" or self.dataset.group.species == "rat": - phenogenPaddingTop = paddingTop + (self.BAND_HEIGHT + self.BAND_SPACING) - ucscPaddingTop = paddingTop + 2*(self.BAND_HEIGHT + self.BAND_SPACING) - ensemblPaddingTop = paddingTop + 3*(self.BAND_HEIGHT + self.BAND_SPACING) + phenogenPaddingTop = paddingTop + \ + (self.BAND_HEIGHT + self.BAND_SPACING) + ucscPaddingTop = paddingTop + 2 * \ + (self.BAND_HEIGHT + self.BAND_SPACING) + ensemblPaddingTop = paddingTop + 3 * \ + (self.BAND_HEIGHT + self.BAND_SPACING) else: - ucscPaddingTop = paddingTop + (self.BAND_HEIGHT + self.BAND_SPACING) - ensemblPaddingTop = paddingTop + 2*(self.BAND_HEIGHT + self.BAND_SPACING) + ucscPaddingTop = paddingTop + \ + (self.BAND_HEIGHT + self.BAND_SPACING) + ensemblPaddingTop = paddingTop + 2 * \ + (self.BAND_HEIGHT + self.BAND_SPACING) if zoom == 1: for pixel in range(xLeftOffset, xLeftOffset + plotWidth, pixelStep): - calBase = self.kONE_MILLION*(startMb + (endMb-startMb)*(pixel-xLeftOffset-0.0)/plotWidth) + calBase = self.kONE_MILLION * \ + (startMb + (endMb - startMb) * \ + (pixel - xLeftOffset - 0.0) / plotWidth) xBrowse1 = pixel - xBrowse2 = min(xLeftOffset + plotWidth, (pixel + pixelStep - 1)) + xBrowse2 = min(xLeftOffset + plotWidth, + (pixel + pixelStep - 1)) - WEBQTL_COORDS = "%d, %d, %d, %d" % (xBrowse1, paddingTop, xBrowse2, (paddingTop+self.BAND_HEIGHT)) - WEBQTL_HREF = "javascript:rangeView('%s', %f, %f)" % (self.selectedChr - 1, max(0, (calBase-webqtlZoomWidth))/1000000.0, (calBase+webqtlZoomWidth)/1000000.0) + WEBQTL_COORDS = "%d, %d, %d, %d" % ( + xBrowse1, paddingTop, xBrowse2, (paddingTop + self.BAND_HEIGHT)) + WEBQTL_HREF = "javascript:rangeView('%s', %f, %f)" % (self.selectedChr - 1, max( + 0, (calBase - webqtlZoomWidth)) / 1000000.0, (calBase + webqtlZoomWidth) / 1000000.0) WEBQTL_TITLE = "Click to view this section of the genome in WebQTL" gifmap.append( @@ -1769,15 +1868,19 @@ class DisplayMappingResults(object): outline=self.CLICKABLE_WEBQTL_REGION_COLOR, fill=self.CLICKABLE_WEBQTL_REGION_COLOR) im_drawer.line( - xy=((xBrowse1, paddingTop), ( xBrowse1, (paddingTop + self.BAND_HEIGHT))), + xy=((xBrowse1, paddingTop), (xBrowse1, + (paddingTop + self.BAND_HEIGHT))), fill=self.CLICKABLE_WEBQTL_REGION_OUTLINE_COLOR) if self.dataset.group.species == "mouse" or self.dataset.group.species == "rat": - PHENOGEN_COORDS = "%d, %d, %d, %d" % (xBrowse1, phenogenPaddingTop, xBrowse2, (phenogenPaddingTop+self.BAND_HEIGHT)) + PHENOGEN_COORDS = "%d, %d, %d, %d" % ( + xBrowse1, phenogenPaddingTop, xBrowse2, (phenogenPaddingTop + self.BAND_HEIGHT)) if self.dataset.group.species == "mouse": - PHENOGEN_HREF = "https://phenogen.org/gene.jsp?speciesCB=Mm&auto=Y&geneTxt=chr%s:%d-%d&genomeVer=mm10" % (self.selectedChr, max(0, calBase-flankingWidthInBases), calBase+flankingWidthInBases) + PHENOGEN_HREF = "https://phenogen.org/gene.jsp?speciesCB=Mm&auto=Y&geneTxt=chr%s:%d-%d&genomeVer=mm10" % ( + self.selectedChr, max(0, calBase - flankingWidthInBases), calBase + flankingWidthInBases) else: - PHENOGEN_HREF = "https://phenogen.org/gene.jsp?speciesCB=Mm&auto=Y&geneTxt=chr%s:%d-%d&genomeVer=mm10" % (self.selectedChr, max(0, calBase-flankingWidthInBases), calBase+flankingWidthInBases) + PHENOGEN_HREF = "https://phenogen.org/gene.jsp?speciesCB=Mm&auto=Y&geneTxt=chr%s:%d-%d&genomeVer=mm10" % ( + self.selectedChr, max(0, calBase - flankingWidthInBases), calBase + flankingWidthInBases) PHENOGEN_TITLE = "Click to view this section of the genome in PhenoGen" gifmap.append( HtmlGenWrapper.create_area_tag( @@ -1787,18 +1890,22 @@ class DisplayMappingResults(object): title=PHENOGEN_TITLE)) im_drawer.rectangle( xy=((xBrowse1, phenogenPaddingTop), - (xBrowse2, (phenogenPaddingTop+self.BAND_HEIGHT))), + (xBrowse2, (phenogenPaddingTop + self.BAND_HEIGHT))), outline=self.CLICKABLE_PHENOGEN_REGION_COLOR, fill=self.CLICKABLE_PHENOGEN_REGION_COLOR) im_drawer.line( - xy=((xBrowse1, phenogenPaddingTop), ( xBrowse1, (phenogenPaddingTop+self.BAND_HEIGHT))), + xy=((xBrowse1, phenogenPaddingTop), (xBrowse1, + (phenogenPaddingTop + self.BAND_HEIGHT))), fill=self.CLICKABLE_PHENOGEN_REGION_OUTLINE_COLOR) - UCSC_COORDS = "%d, %d, %d, %d" %(xBrowse1, ucscPaddingTop, xBrowse2, (ucscPaddingTop+self.BAND_HEIGHT)) + UCSC_COORDS = "%d, %d, %d, %d" % ( + xBrowse1, ucscPaddingTop, xBrowse2, (ucscPaddingTop + self.BAND_HEIGHT)) if self.dataset.group.species == "mouse": - UCSC_HREF = "http://genome.ucsc.edu/cgi-bin/hgTracks?db=%s&position=chr%s:%d-%d&hgt.customText=%s/snp/chr%s" % (self._ucscDb, self.selectedChr, max(0, calBase-flankingWidthInBases), calBase+flankingWidthInBases, webqtlConfig.PORTADDR, self.selectedChr) + UCSC_HREF = "http://genome.ucsc.edu/cgi-bin/hgTracks?db=%s&position=chr%s:%d-%d&hgt.customText=%s/snp/chr%s" % ( + self._ucscDb, self.selectedChr, max(0, calBase - flankingWidthInBases), calBase + flankingWidthInBases, webqtlConfig.PORTADDR, self.selectedChr) else: - UCSC_HREF = "http://genome.ucsc.edu/cgi-bin/hgTracks?db=%s&position=chr%s:%d-%d" % (self._ucscDb, self.selectedChr, max(0, calBase-flankingWidthInBases), calBase+flankingWidthInBases) + UCSC_HREF = "http://genome.ucsc.edu/cgi-bin/hgTracks?db=%s&position=chr%s:%d-%d" % ( + self._ucscDb, self.selectedChr, max(0, calBase - flankingWidthInBases), calBase + flankingWidthInBases) UCSC_TITLE = "Click to view this section of the genome in the UCSC Genome Browser" gifmap.append( HtmlGenWrapper.create_area_tag( @@ -1808,19 +1915,22 @@ class DisplayMappingResults(object): title=UCSC_TITLE)) im_drawer.rectangle( xy=((xBrowse1, ucscPaddingTop), - (xBrowse2, (ucscPaddingTop+self.BAND_HEIGHT))), + (xBrowse2, (ucscPaddingTop + self.BAND_HEIGHT))), outline=self.CLICKABLE_UCSC_REGION_COLOR, fill=self.CLICKABLE_UCSC_REGION_COLOR) im_drawer.line( xy=((xBrowse1, ucscPaddingTop), - (xBrowse1, (ucscPaddingTop+self.BAND_HEIGHT))), + (xBrowse1, (ucscPaddingTop + self.BAND_HEIGHT))), fill=self.CLICKABLE_UCSC_REGION_OUTLINE_COLOR) - ENSEMBL_COORDS = "%d, %d, %d, %d" %(xBrowse1, ensemblPaddingTop, xBrowse2, (ensemblPaddingTop+self.BAND_HEIGHT)) + ENSEMBL_COORDS = "%d, %d, %d, %d" % ( + xBrowse1, ensemblPaddingTop, xBrowse2, (ensemblPaddingTop + self.BAND_HEIGHT)) if self.dataset.group.species == "mouse": - ENSEMBL_HREF = "http://www.ensembl.org/Mus_musculus/contigview?highlight=&chr=%s&vc_start=%d&vc_end=%d&x=35&y=12" % (self.selectedChr, max(0, calBase-flankingWidthInBases), calBase+flankingWidthInBases) + ENSEMBL_HREF = "http://www.ensembl.org/Mus_musculus/contigview?highlight=&chr=%s&vc_start=%d&vc_end=%d&x=35&y=12" % ( + self.selectedChr, max(0, calBase - flankingWidthInBases), calBase + flankingWidthInBases) else: - ENSEMBL_HREF = "http://www.ensembl.org/Rattus_norvegicus/contigview?chr=%s&start=%d&end=%d" % (self.selectedChr, max(0, calBase-flankingWidthInBases), calBase+flankingWidthInBases) + ENSEMBL_HREF = "http://www.ensembl.org/Rattus_norvegicus/contigview?chr=%s&start=%d&end=%d" % ( + self.selectedChr, max(0, calBase - flankingWidthInBases), calBase + flankingWidthInBases) ENSEMBL_TITLE = "Click to view this section of the genome in the Ensembl Genome Browser" gifmap.append(HtmlGenWrapper.create_area_tag( shape='rect', @@ -1829,36 +1939,40 @@ class DisplayMappingResults(object): title=ENSEMBL_TITLE)) im_drawer.rectangle( xy=((xBrowse1, ensemblPaddingTop), - (xBrowse2, (ensemblPaddingTop+self.BAND_HEIGHT))), + (xBrowse2, (ensemblPaddingTop + self.BAND_HEIGHT))), outline=self.CLICKABLE_ENSEMBL_REGION_COLOR, fill=self.CLICKABLE_ENSEMBL_REGION_COLOR) im_drawer.line( xy=((xBrowse1, ensemblPaddingTop), - (xBrowse1, (ensemblPaddingTop+self.BAND_HEIGHT))), + (xBrowse1, (ensemblPaddingTop + self.BAND_HEIGHT))), fill=self.CLICKABLE_ENSEMBL_REGION_OUTLINE_COLOR) # end for im_drawer.text( text="Click to view the corresponding section of the genome in an 8x expanded WebQTL map", - xy=((xLeftOffset + 10), paddingTop),# + self.BAND_HEIGHT/2), + xy=((xLeftOffset + 10), paddingTop), # + self.BAND_HEIGHT/2), font=clickableRegionLabelFont, fill=self.CLICKABLE_WEBQTL_TEXT_COLOR) if self.dataset.group.species == "mouse" or self.dataset.group.species == "rat": im_drawer.text( text="Click to view the corresponding section of the genome in PhenoGen", - xy=((xLeftOffset + 10), phenogenPaddingTop),# + self.BAND_HEIGHT/2), + # + self.BAND_HEIGHT/2), + xy=((xLeftOffset + 10), phenogenPaddingTop), font=clickableRegionLabelFont, fill=self.CLICKABLE_PHENOGEN_TEXT_COLOR) im_drawer.text( text="Click to view the corresponding section of the genome in the UCSC Genome Browser", - xy=((xLeftOffset + 10), ucscPaddingTop),# + self.BAND_HEIGHT/2), + # + self.BAND_HEIGHT/2), + xy=((xLeftOffset + 10), ucscPaddingTop), font=clickableRegionLabelFont, fill=self.CLICKABLE_UCSC_TEXT_COLOR) im_drawer.text( text="Click to view the corresponding section of the genome in the Ensembl Genome Browser", - xy=((xLeftOffset+10), ensemblPaddingTop),# + self.BAND_HEIGHT/2), + # + self.BAND_HEIGHT/2), + xy=((xLeftOffset + 10), ensemblPaddingTop), font=clickableRegionLabelFont, fill=self.CLICKABLE_ENSEMBL_TEXT_COLOR) - #draw the gray text - chrFont = ImageFont.truetype(font=VERDANA_BOLD_FILE, size=26*zoom) + # draw the gray text + chrFont = ImageFont.truetype( + font=VERDANA_BOLD_FILE, size=26 * zoom) chrX = xLeftOffset + plotWidth - 2 - im_drawer.textsize( "Chr %s" % self.ChrList[self.selectedChr][0], font=chrFont)[0] im_drawer.text( @@ -1866,17 +1980,17 @@ class DisplayMappingResults(object): xy=(chrX, phenogenPaddingTop), font=chrFont, fill=GRAY) # end of drawBrowserClickableRegions else: - #draw the gray text - chrFont = ImageFont.truetype(font=VERDANA_FILE, size=26*zoom) + # draw the gray text + chrFont = ImageFont.truetype(font=VERDANA_FILE, size=26 * zoom) chrX = xLeftOffset + (plotWidth - im_drawer.textsize( - "Chr %s" % currentChromosome, font=chrFont)[0])/2 + "Chr %s" % currentChromosome, font=chrFont)[0]) / 2 im_drawer.text( text="Chr %s" % currentChromosome, xy=(chrX, 32), font=chrFont, fill=GRAY) # end of drawBrowserClickableRegions pass - def drawXAxis(self, canvas, drawAreaHeight, gifmap, plotXScale, showLocusForm, offset= (40, 120, 80, 10), zoom = 1, startMb = None, endMb = None): + def drawXAxis(self, canvas, drawAreaHeight, gifmap, plotXScale, showLocusForm, offset=(40, 120, 80, 10), zoom=1, startMb=None, endMb=None): im_drawer = ImageDraw.Draw(canvas) xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset plotWidth = canvas.size[0] - xLeftOffset - xRightOffset @@ -1886,33 +2000,33 @@ class DisplayMappingResults(object): if zoom == 2: fontZoom = 1.5 - #Parameters - NUM_MINOR_TICKS = 5 # Number of minor ticks between major ticks + # Parameters + NUM_MINOR_TICKS = 5 # Number of minor ticks between major ticks X_MAJOR_TICK_THICKNESS = 3 X_MINOR_TICK_THICKNESS = 1 - X_AXIS_THICKNESS = 1*zoom + X_AXIS_THICKNESS = 1 * zoom # ======= Alex: Draw the X-axis labels (megabase location) - MBLabelFont = ImageFont.truetype(font=VERDANA_FILE, size=15*zoom) - xMajorTickHeight = 10 * zoom # How high the tick extends below the axis - xMinorTickHeight = 5*zoom + MBLabelFont = ImageFont.truetype(font=VERDANA_FILE, size=15 * zoom) + xMajorTickHeight = 10 * zoom # How high the tick extends below the axis + xMinorTickHeight = 5 * zoom xAxisTickMarkColor = BLACK xAxisLabelColor = BLACK - fontHeight = 12*fontZoom # How tall the font that we're using is + fontHeight = 12 * fontZoom # How tall the font that we're using is spacingFromLabelToAxis = 10 if self.plotScale == 'physic': - strYLoc = yZero + MBLabelFont.font.height/2 - ###Physical single chromosome view + strYLoc = yZero + MBLabelFont.font.height / 2 + # Physical single chromosome view if self.selectedChr > -1: XScale = Plot.detScale(startMb, endMb) XStart, XEnd, XStep = XScale if XStep < 8: XStep *= 2 - spacingAmtX = spacingAmt = (XEnd-XStart)/XStep + spacingAmtX = spacingAmt = (XEnd - XStart) / XStep j = 0 - while abs(spacingAmtX -int(spacingAmtX)) >= spacingAmtX/100.0 and j < 6: + while abs(spacingAmtX - int(spacingAmtX)) >= spacingAmtX / 100.0 and j < 6: j += 1 spacingAmtX *= 10 @@ -1921,30 +2035,32 @@ class DisplayMappingResults(object): for counter, _Mb in enumerate(Plot.frange(XStart, XEnd, spacingAmt / NUM_MINOR_TICKS)): if _Mb < startMb or _Mb > endMb: continue - Xc = xLeftOffset + plotXScale*(_Mb - startMb) - if counter % NUM_MINOR_TICKS == 0: # Draw a MAJOR mark, not just a minor tick mark + Xc = xLeftOffset + plotXScale * (_Mb - startMb) + if counter % NUM_MINOR_TICKS == 0: # Draw a MAJOR mark, not just a minor tick mark im_drawer.line(xy=((Xc, yZero), - (Xc, yZero+xMajorTickHeight)), + (Xc, yZero + xMajorTickHeight)), fill=xAxisTickMarkColor, - width=X_MAJOR_TICK_THICKNESS) # Draw the MAJOR tick mark - labelStr = str(formatStr % _Mb) # What Mbase location to put on the label - strWidth, strHeight = im_drawer.textsize(labelStr, font=MBLabelFont) + width=X_MAJOR_TICK_THICKNESS) # Draw the MAJOR tick mark + # What Mbase location to put on the label + labelStr = str(formatStr % _Mb) + strWidth, strHeight = im_drawer.textsize( + labelStr, font=MBLabelFont) drawStringXc = (Xc - (strWidth / 2.0)) im_drawer.text(xy=(drawStringXc, strYLoc), text=labelStr, font=MBLabelFont, fill=xAxisLabelColor) else: im_drawer.line(xy=((Xc, yZero), - (Xc, yZero+xMinorTickHeight)), + (Xc, yZero + xMinorTickHeight)), fill=xAxisTickMarkColor, - width=X_MINOR_TICK_THICKNESS) # Draw the MINOR tick mark + width=X_MINOR_TICK_THICKNESS) # Draw the MINOR tick mark - ###Physical genome wide view + # Physical genome wide view else: distScale = 0 startPosX = xLeftOffset for i, distLen in enumerate(self.ChrLengthDistList): - if distScale == 0: #universal scale in whole genome mapping + if distScale == 0: # universal scale in whole genome mapping if distLen > 75: distScale = 25 elif distLen > 30: @@ -1953,51 +2069,55 @@ class DisplayMappingResults(object): distScale = 5 for j, tickdists in enumerate(range(distScale, int(ceil(distLen)), distScale)): im_drawer.line( - xy=((startPosX+tickdists*plotXScale, yZero), - (startPosX+tickdists*plotXScale, yZero + 7)), - fill=BLACK, width=1*zoom) + xy=((startPosX + tickdists * plotXScale, yZero), + (startPosX + tickdists * plotXScale, yZero + 7)), + fill=BLACK, width=1 * zoom) if j % 2 == 0: draw_rotated_text( canvas, text=str(tickdists), font=MBLabelFont, - xy=(startPosX+tickdists*plotXScale, - yZero+10*zoom), fill=BLACK, angle=270) - startPosX += (self.ChrLengthDistList[i]+self.GraphInterval)*plotXScale + xy=(startPosX + tickdists * plotXScale, + yZero + 10 * zoom), fill=BLACK, angle=270) + startPosX += (self.ChrLengthDistList[i] + \ + self.GraphInterval) * plotXScale - megabaseLabelFont = ImageFont.truetype(font=VERDANA_FILE, size=int(18*zoom*1.5)) + megabaseLabelFont = ImageFont.truetype( + font=VERDANA_FILE, size=int(18 * zoom * 1.5)) im_drawer.text( text="Megabases", xy=( - xLeftOffset+(plotWidth-im_drawer.textsize( - "Megabases", font=megabaseLabelFont)[0])/2, - strYLoc+MBLabelFont.font.height+10*(zoom%2)), + xLeftOffset + (plotWidth - im_drawer.textsize( + "Megabases", font=megabaseLabelFont)[0]) / 2, + strYLoc + MBLabelFont.font.height + 10 * (zoom % 2)), font=megabaseLabelFont, fill=BLACK) pass else: - strYLoc = yZero + spacingFromLabelToAxis + MBLabelFont.font.height/2 + strYLoc = yZero + spacingFromLabelToAxis + MBLabelFont.font.height / 2 ChrAInfo = [] preLpos = -1 distinctCount = 0.0 - if self.selectedChr == -1: #ZS: If viewing full genome/all chromosomes + if self.selectedChr == -1: # ZS: If viewing full genome/all chromosomes for i, _chr in enumerate(self.genotype): thisChr = [] Locus0CM = _chr[0].cM nLoci = len(_chr) - if nLoci <= 8: + if nLoci <= 8: for _locus in _chr: if _locus.name != ' - ': if _locus.cM != preLpos: distinctCount += 1 preLpos = _locus.cM - thisChr.append([_locus.name, _locus.cM-Locus0CM]) + thisChr.append( + [_locus.name, _locus.cM - Locus0CM]) else: - for j in (0, nLoci/4, nLoci/2, nLoci*3/4, -1): + for j in (0, nLoci / 4, nLoci / 2, nLoci * 3 / 4, -1): while _chr[j].name == ' - ': j += 1 if _chr[j].cM != preLpos: distinctCount += 1 preLpos = _chr[j].cM - thisChr.append([_chr[j].name, _chr[j].cM-Locus0CM]) + thisChr.append( + [_chr[j].name, _chr[j].cM - Locus0CM]) ChrAInfo.append(thisChr) else: for i, _chr in enumerate(self.genotype): @@ -2009,10 +2129,11 @@ class DisplayMappingResults(object): if _locus.cM != preLpos: distinctCount += 1 preLpos = _locus.cM - thisChr.append([_locus.name, _locus.cM-Locus0CM]) + thisChr.append( + [_locus.name, _locus.cM - Locus0CM]) ChrAInfo.append(thisChr) - stepA = (plotWidth+0.0)/distinctCount + stepA = (plotWidth + 0.0) / distinctCount LRectWidth = 10 LRectHeight = 3 @@ -2037,28 +2158,29 @@ class DisplayMappingResults(object): Zorder = 0 if differ: im_drawer.line( - xy=((startPosX+Lpos, yZero), (xLeftOffset+offsetA,\ - yZero+25)), + xy=((startPosX + Lpos, yZero), (xLeftOffset + offsetA,\ + yZero + 25)), fill=lineColor) im_drawer.line( - xy=((xLeftOffset+offsetA, yZero+25), (xLeftOffset+offsetA,\ - yZero+40+Zorder*(LRectWidth+3))), + xy=((xLeftOffset + offsetA, yZero + 25), (xLeftOffset + offsetA,\ + yZero + 40 + Zorder * (LRectWidth + 3))), fill=lineColor) rectColor = ORANGE else: im_drawer.line( - xy=((xLeftOffset+offsetA, yZero+40+Zorder*(LRectWidth+3)-3), (\ - xLeftOffset+offsetA, yZero+40+Zorder*(LRectWidth+3))), + xy=((xLeftOffset + offsetA, yZero + 40 + Zorder * (LRectWidth + 3) - 3), (\ + xLeftOffset + offsetA, yZero + 40 + Zorder * (LRectWidth + 3))), fill=lineColor) rectColor = DEEPPINK im_drawer.rectangle( - xy=((xLeftOffset+offsetA, yZero+40+Zorder*(LRectWidth+3)), - (xLeftOffset+offsetA-LRectHeight, - yZero+40+Zorder*(LRectWidth+3)+LRectWidth)), - outline=rectColor, fill=rectColor, width = 0) - COORDS="%d,%d,%d,%d"%(xLeftOffset+offsetA-LRectHeight, yZero+40+Zorder*(LRectWidth+3),\ - xLeftOffset+offsetA,yZero+40+Zorder*(LRectWidth+3)+LRectWidth) - HREF = "/show_trait?trait_id=%s&dataset=%s" % (Lname, self.dataset.group.name+"Geno") + xy=((xLeftOffset + offsetA, yZero + 40 + Zorder * (LRectWidth + 3)), + (xLeftOffset + offsetA - LRectHeight, + yZero + 40 + Zorder * (LRectWidth + 3) + LRectWidth)), + outline=rectColor, fill=rectColor, width=0) + COORDS = "%d,%d,%d,%d" % (xLeftOffset + offsetA - LRectHeight, yZero + 40 + Zorder * (LRectWidth + 3),\ + xLeftOffset + offsetA, yZero + 40 + Zorder * (LRectWidth + 3) + LRectWidth) + HREF = "/show_trait?trait_id=%s&dataset=%s" % ( + Lname, self.dataset.group.name + "Geno") #HREF="javascript:showDatabase3('%s','%s','%s','');" % (showLocusForm,fd.RISet+"Geno", Lname) Areas = HtmlGenWrapper.create_area_tag( shape='rect', @@ -2067,26 +2189,27 @@ class DisplayMappingResults(object): target="_blank", title="Locus : {}".format(Lname)) gifmap.append(Areas) - ##piddle bug + # piddle bug if j == 0: im_drawer.line( - xy=((startPosX, yZero), (startPosX, yZero+40)), + xy=((startPosX, yZero), (startPosX, yZero + 40)), fill=lineColor) - startPosX += (self.ChrLengthDistList[j]+self.GraphInterval)*plotXScale + startPosX += (self.ChrLengthDistList[j] + \ + self.GraphInterval) * plotXScale - centimorganLabelFont = ImageFont.truetype(font=VERDANA_FILE, size=int(18*zoom*1.5)) + centimorganLabelFont = ImageFont.truetype( + font=VERDANA_FILE, size=int(18 * zoom * 1.5)) im_drawer.text( text="Centimorgans", - xy=(xLeftOffset+(plotWidth-im_drawer.textsize( - "Centimorgans", font=centimorganLabelFont)[0])/2, - strYLoc + MBLabelFont.font.height+ 10*(zoom%2)), + xy=(xLeftOffset + (plotWidth - im_drawer.textsize( + "Centimorgans", font=centimorganLabelFont)[0]) / 2, + strYLoc + MBLabelFont.font.height + 10 * (zoom % 2)), font=centimorganLabelFont, fill=BLACK) - im_drawer.line(xy=((xLeftOffset, yZero), (xLeftOffset+plotWidth, yZero)), - fill=BLACK, width=X_AXIS_THICKNESS) # Draw the X axis itself - + im_drawer.line(xy=((xLeftOffset, yZero), (xLeftOffset + plotWidth, yZero)), + fill=BLACK, width=X_AXIS_THICKNESS) # Draw the X axis itself - def drawQTL(self, canvas, drawAreaHeight, gifmap, plotXScale, offset= (40, 120, 80, 10), zoom = 1, startMb = None, endMb = None): + def drawQTL(self, canvas, drawAreaHeight, gifmap, plotXScale, offset=(40, 120, 80, 10), zoom=1, startMb=None, endMb=None): im_drawer = ImageDraw.Draw(canvas) xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset plotWidth = canvas.size[0] - xLeftOffset - xRightOffset @@ -2095,74 +2218,85 @@ class DisplayMappingResults(object): if zoom == 2: fontZoom = 1.5 - INTERCROSS = (self.genotype.type=="intercross") + INTERCROSS = (self.genotype.type == "intercross") - #draw the LRS scale - #We first determine whether or not we are using a sliding scale. - #If so, we need to compute the maximum LRS value to determine where the max y-value should be, and call this LRS_LOD_Max. - #LRSTop is then defined to be above the LRS_LOD_Max by enough to add one additional LRSScale increment. - #if we are using a set-scale, then we set LRSTop to be the user's value, and LRS_LOD_Max doesn't matter. + # draw the LRS scale + # We first determine whether or not we are using a sliding scale. + # If so, we need to compute the maximum LRS value to determine where the max y-value should be, and call this LRS_LOD_Max. + # LRSTop is then defined to be above the LRS_LOD_Max by enough to add one additional LRSScale increment. + # if we are using a set-scale, then we set LRSTop to be the user's value, and LRS_LOD_Max doesn't matter. - #ZS: This is a mess, but I don't know a better way to account for different mapping methods returning results in different formats + the option to change between LRS and LOD - if self.lrsMax <= 0: #sliding scale + # ZS: This is a mess, but I don't know a better way to account for different mapping methods returning results in different formats + the option to change between LRS and LOD + if self.lrsMax <= 0: # sliding scale if "lrs_value" in self.qtlresults[0]: - LRS_LOD_Max = max([result['lrs_value'] for result in self.qtlresults]) + LRS_LOD_Max = max([result['lrs_value'] + for result in self.qtlresults]) if self.LRS_LOD == "LOD" or self.LRS_LOD == "-logP": LRS_LOD_Max = LRS_LOD_Max / self.LODFACTOR if self.permChecked and self.nperm > 0 and not self.multipleInterval: - self.significant = min(self.significant / self.LODFACTOR, webqtlConfig.MAXLRS) - self.suggestive = min(self.suggestive / self.LODFACTOR, webqtlConfig.MAXLRS) + self.significant = min( + self.significant / self.LODFACTOR, webqtlConfig.MAXLRS) + self.suggestive = min( + self.suggestive / self.LODFACTOR, webqtlConfig.MAXLRS) else: if self.permChecked and self.nperm > 0 and not self.multipleInterval: - self.significant = min(self.significant, webqtlConfig.MAXLRS) - self.suggestive = min(self.suggestive, webqtlConfig.MAXLRS) + self.significant = min( + self.significant, webqtlConfig.MAXLRS) + self.suggestive = min( + self.suggestive, webqtlConfig.MAXLRS) else: pass else: - LRS_LOD_Max = max([result['lod_score'] for result in self.qtlresults]) + LRS_LOD_Max = max([result['lod_score'] + for result in self.qtlresults]) if self.LRS_LOD == "LRS": LRS_LOD_Max = LRS_LOD_Max * self.LODFACTOR if self.permChecked and self.nperm > 0 and not self.multipleInterval: - self.significant = min(self.significant * self.LODFACTOR, webqtlConfig.MAXLRS) - self.suggestive = min(self.suggestive * self.LODFACTOR, webqtlConfig.MAXLRS) + self.significant = min( + self.significant * self.LODFACTOR, webqtlConfig.MAXLRS) + self.suggestive = min( + self.suggestive * self.LODFACTOR, webqtlConfig.MAXLRS) else: if self.permChecked and self.nperm > 0 and not self.multipleInterval: - self.significant = min(self.significant, webqtlConfig.MAXLRS) - self.suggestive = min(self.suggestive, webqtlConfig.MAXLRS) + self.significant = min( + self.significant, webqtlConfig.MAXLRS) + self.suggestive = min( + self.suggestive, webqtlConfig.MAXLRS) else: pass if self.permChecked and self.nperm > 0 and not self.multipleInterval: LRS_LOD_Max = max(self.significant, LRS_LOD_Max) - #genotype trait will give infinite LRS + # genotype trait will give infinite LRS LRS_LOD_Max = min(LRS_LOD_Max, webqtlConfig.MAXLRS) else: LRS_LOD_Max = self.lrsMax - #ZS: Needed to pass to genome browser + # ZS: Needed to pass to genome browser js_data = json.loads(self.js_data) if self.LRS_LOD == "LRS": - js_data['max_score'] = LRS_LOD_Max/4.61 + js_data['max_score'] = LRS_LOD_Max / 4.61 else: js_data['max_score'] = LRS_LOD_Max self.js_data = json.dumps(js_data) - LRSScaleFont=ImageFont.truetype(font=VERDANA_FILE, size=16*zoom) - LRSLODFont=ImageFont.truetype(font=VERDANA_FILE, size=int(18*zoom*1.5)) + LRSScaleFont = ImageFont.truetype(font=VERDANA_FILE, size=16 * zoom) + LRSLODFont = ImageFont.truetype( + font=VERDANA_FILE, size=int(18 * zoom * 1.5)) yZero = yTopOffset + plotHeight # LRSHeightThresh = drawAreaHeight # AdditiveHeightThresh = drawAreaHeight/2 # DominanceHeightThresh = drawAreaHeight/2 if self.selectedChr == 1: - LRSHeightThresh = drawAreaHeight - yTopOffset + 30*(zoom - 1) - AdditiveHeightThresh = LRSHeightThresh/2 - DominanceHeightThresh = LRSHeightThresh/2 + LRSHeightThresh = drawAreaHeight - yTopOffset + 30 * (zoom - 1) + AdditiveHeightThresh = LRSHeightThresh / 2 + DominanceHeightThresh = LRSHeightThresh / 2 else: LRSHeightThresh = drawAreaHeight - AdditiveHeightThresh = drawAreaHeight/2 - DominanceHeightThresh = drawAreaHeight/2 + AdditiveHeightThresh = drawAreaHeight / 2 + DominanceHeightThresh = drawAreaHeight / 2 # LRSHeightThresh = (yZero - yTopOffset + 30*(zoom - 1)) # AdditiveHeightThresh = LRSHeightThresh/2 # DominanceHeightThresh = LRSHeightThresh/2 @@ -2178,7 +2312,7 @@ class DisplayMappingResults(object): LRSAxisList = Plot.frange(LRSScale, LRS_LOD_Max, LRSScale) - #ZS: Convert to int if all axis values are whole numbers + # ZS: Convert to int if all axis values are whole numbers all_int = True for item in LRSAxisList: if isinstance(item, int): @@ -2192,9 +2326,10 @@ class DisplayMappingResults(object): # else: # max_lrs_width = canvas.stringWidth("%2.1f" % LRS_LOD_Max, font=LRSScaleFont) + 30 - #draw the "LRS" or "LOD" string to the left of the axis - LRSScaleFont=ImageFont.truetype(font=VERDANA_FILE, size=16*zoom) - LRSLODFont=ImageFont.truetype(font=VERDANA_FILE, size=int(18*zoom*1.5)) + # draw the "LRS" or "LOD" string to the left of the axis + LRSScaleFont = ImageFont.truetype(font=VERDANA_FILE, size=16 * zoom) + LRSLODFont = ImageFont.truetype( + font=VERDANA_FILE, size=int(18 * zoom * 1.5)) yZero = yTopOffset + plotHeight # TEXT_X_DISPLACEMENT = -20 @@ -2210,64 +2345,69 @@ class DisplayMappingResults(object): draw_rotated_text( canvas, text=self.LRS_LOD, font=LRSLODFont, xy=(xLeftOffset - im_drawer.textsize( - "999.99", font=LRSScaleFont)[0] - 15*(zoom-1) + TEXT_X_DISPLACEMENT, - yZero + TEXT_Y_DISPLACEMENT - 300*(zoom - 1)), + "999.99", font=LRSScaleFont)[0] - 15 * (zoom - 1) + TEXT_X_DISPLACEMENT, + yZero + TEXT_Y_DISPLACEMENT - 300 * (zoom - 1)), fill=BLACK, angle=90) for item in LRSAxisList: if LRS_LOD_Max == 0.0: LRS_LOD_Max = 0.000001 - yTopOffset + 30*(zoom - 1) - yLRS = yZero - (item/LRS_LOD_Max) * LRSHeightThresh - im_drawer.line(xy=((xLeftOffset, yLRS), (xLeftOffset-4, yLRS)), - fill=self.LRS_COLOR, width=1*zoom) + yTopOffset + 30 * (zoom - 1) + yLRS = yZero - (item / LRS_LOD_Max) * LRSHeightThresh + im_drawer.line(xy=((xLeftOffset, yLRS), (xLeftOffset - 4, yLRS)), + fill=self.LRS_COLOR, width=1 * zoom) if all_int: scaleStr = "%d" % item else: scaleStr = "%2.1f" % item - #Draw the LRS/LOD Y axis label + # Draw the LRS/LOD Y axis label TEXT_Y_DISPLACEMENT = -10 im_drawer.text( text=scaleStr, - xy=(xLeftOffset-4-im_drawer.textsize(scaleStr, font=LRSScaleFont)[0]-5, - yLRS+TEXT_Y_DISPLACEMENT), + xy=(xLeftOffset - 4 - im_drawer.textsize(scaleStr, font=LRSScaleFont)[0] - 5, + yLRS + TEXT_Y_DISPLACEMENT), font=LRSScaleFont, fill=self.LRS_COLOR) if self.permChecked and self.nperm > 0 and not self.multipleInterval: - significantY = yZero - self.significant*LRSHeightThresh/LRS_LOD_Max - suggestiveY = yZero - self.suggestive*LRSHeightThresh/LRS_LOD_Max + significantY = yZero - self.significant * LRSHeightThresh / LRS_LOD_Max + suggestiveY = yZero - self.suggestive * LRSHeightThresh / LRS_LOD_Max # significantY = yZero - self.significant*LRSHeightThresh/LRSAxisList[-1] # suggestiveY = yZero - self.suggestive*LRSHeightThresh/LRSAxisList[-1] startPosX = xLeftOffset - #"Significant" and "Suggestive" Drawing Routine + # "Significant" and "Suggestive" Drawing Routine # ======= Draw the thick lines for "Significant" and "Suggestive" ===== (crowell: I tried to make the SNPs draw over these lines, but piddle wouldn't have it...) - #ZS: I don't know if what I did here with this inner function is clever or overly complicated, but it's the only way I could think of to avoid duplicating the code inside this function + # ZS: I don't know if what I did here with this inner function is clever or overly complicated, but it's the only way I could think of to avoid duplicating the code inside this function def add_suggestive_significant_lines_and_legend(start_pos_x, chr_length_dist): - rightEdge = int(start_pos_x + chr_length_dist*plotXScale - self.SUGGESTIVE_WIDTH/1.5) + rightEdge = int(start_pos_x + chr_length_dist * \ + plotXScale - self.SUGGESTIVE_WIDTH / 1.5) im_drawer.line( - xy=((start_pos_x+self.SUGGESTIVE_WIDTH/1.5, suggestiveY), + xy=((start_pos_x + self.SUGGESTIVE_WIDTH / 1.5, suggestiveY), (rightEdge, suggestiveY)), - fill=self.SUGGESTIVE_COLOR, width=self.SUGGESTIVE_WIDTH*zoom - #,clipX=(xLeftOffset, xLeftOffset + plotWidth-2) + fill=self.SUGGESTIVE_COLOR, width=self.SUGGESTIVE_WIDTH * zoom + # ,clipX=(xLeftOffset, xLeftOffset + plotWidth-2) ) im_drawer.line( - xy=((start_pos_x+self.SUGGESTIVE_WIDTH/1.5, significantY), - (rightEdge, significantY)), + xy=((start_pos_x + self.SUGGESTIVE_WIDTH / 1.5, significantY), + (rightEdge, significantY)), fill=self.SIGNIFICANT_COLOR, - width=self.SIGNIFICANT_WIDTH*zoom - #, clipX=(xLeftOffset, xLeftOffset + plotWidth-2) + width=self.SIGNIFICANT_WIDTH * zoom + # , clipX=(xLeftOffset, xLeftOffset + plotWidth-2) ) - sugg_coords = "%d, %d, %d, %d" % (start_pos_x, suggestiveY-2, rightEdge + 2*zoom, suggestiveY+2) - sig_coords = "%d, %d, %d, %d" % (start_pos_x, significantY-2, rightEdge + 2*zoom, significantY+2) + sugg_coords = "%d, %d, %d, %d" % ( + start_pos_x, suggestiveY - 2, rightEdge + 2 * zoom, suggestiveY + 2) + sig_coords = "%d, %d, %d, %d" % ( + start_pos_x, significantY - 2, rightEdge + 2 * zoom, significantY + 2) if self.LRS_LOD == 'LRS': sugg_title = "Suggestive LRS = %0.2f" % self.suggestive sig_title = "Significant LRS = %0.2f" % self.significant else: - sugg_title = "Suggestive LOD = %0.2f" % (self.suggestive/4.61) - sig_title = "Significant LOD = %0.2f" % (self.significant/4.61) + sugg_title = "Suggestive LOD = %0.2f" % ( + self.suggestive / 4.61) + sig_title = "Significant LOD = %0.2f" % ( + self.significant / 4.61) Areas1 = HtmlGenWrapper.create_area_tag( shape='rect', coords=sugg_coords, @@ -2279,24 +2419,28 @@ class DisplayMappingResults(object): gifmap.append(Areas1) gifmap.append(Areas2) - start_pos_x += (chr_length_dist+self.GraphInterval)*plotXScale + start_pos_x += (chr_length_dist + \ + self.GraphInterval) * plotXScale return start_pos_x for i, _chr in enumerate(self.genotype): if self.selectedChr != -1: if _chr.name == self.ChrList[self.selectedChr][0]: - startPosX = add_suggestive_significant_lines_and_legend(startPosX, self.ChrLengthDistList[0]) + startPosX = add_suggestive_significant_lines_and_legend( + startPosX, self.ChrLengthDistList[0]) break else: continue else: - startPosX = add_suggestive_significant_lines_and_legend(startPosX, self.ChrLengthDistList[i]) + startPosX = add_suggestive_significant_lines_and_legend( + startPosX, self.ChrLengthDistList[i]) if self.multipleInterval: lrsEdgeWidth = 1 else: if self.additiveChecked: - additiveMax = max([abs(X['additive']) for X in self.qtlresults]) + additiveMax = max([abs(X['additive']) + for X in self.qtlresults]) lrsEdgeWidth = 3 if zoom == 2: @@ -2306,7 +2450,8 @@ class DisplayMappingResults(object): AdditiveCoordXY = [] DominanceCoordXY = [] - symbolFont = ImageFont.truetype(font=FNT_BS_FILE, size=5) #ZS: For Manhattan Plot + symbolFont = ImageFont.truetype( + font=FNT_BS_FILE, size=5) # ZS: For Manhattan Plot previous_chr = 1 previous_chr_as_int = 0 @@ -2332,128 +2477,142 @@ class DisplayMappingResults(object): minusColor = self.ADDITIVE_COLOR_NEGATIVE for k, aPoint in enumerate(AdditiveCoordXY): if k > 0: - Xc0, Yc0 = AdditiveCoordXY[k-1] + Xc0, Yc0 = AdditiveCoordXY[k - 1] Xc, Yc = aPoint - if (Yc0-yZero)*(Yc-yZero) < 0: - if Xc == Xc0: #genotype , locus distance is 0 + if (Yc0 - yZero) * (Yc - yZero) < 0: + if Xc == Xc0: # genotype , locus distance is 0 Xcm = Xc else: - Xcm = (yZero-Yc0)/((Yc-Yc0)/(Xc-Xc0)) +Xc0 + Xcm = (yZero - Yc0) / \ + ((Yc - Yc0) / (Xc - Xc0)) + Xc0 if Yc0 < yZero: im_drawer.line( xy=((Xc0, Yc0), (Xcm, yZero)), fill=plusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) im_drawer.line( - xy=((Xcm, yZero), (Xc, yZero-(Yc-yZero))), + xy=((Xcm, yZero), + (Xc, yZero - (Yc - yZero))), fill=minusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) else: im_drawer.line( - xy=((Xc0, yZero-(Yc0-yZero)), + xy=((Xc0, yZero - (Yc0 - yZero)), (Xcm, yZero)), fill=minusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) im_drawer.line( xy=((Xcm, yZero), (Xc, Yc)), fill=plusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) - elif (Yc0-yZero)*(Yc-yZero) > 0: + elif (Yc0 - yZero) * (Yc - yZero) > 0: if Yc < yZero: im_drawer.line( xy=((Xc0, Yc0), (Xc, Yc)), fill=plusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) else: im_drawer.line( - xy=((Xc0, yZero - (Yc0-yZero)), - (Xc, yZero - (Yc-yZero))), + xy=((Xc0, yZero - (Yc0 - yZero)), + (Xc, yZero - (Yc - yZero))), fill=minusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) else: - minYc = min(Yc-yZero, Yc0-yZero) + minYc = min(Yc - yZero, Yc0 - yZero) if minYc < 0: im_drawer.line( xy=((Xc0, Yc0), (Xc, Yc)), fill=plusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) else: im_drawer.line( - xy=((Xc0, yZero - (Yc0-yZero)), - (Xc, yZero - (Yc-yZero))), + xy=((Xc0, yZero - (Yc0 - yZero)), + (Xc, yZero - (Yc - yZero))), fill=minusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) LRSCoordXY = [] AdditiveCoordXY = [] previous_chr = qtlresult['chr'] previous_chr_as_int += 1 - newStartPosX = (self.ChrLengthDistList[previous_chr_as_int - 1]+self.GraphInterval)*plotXScale + newStartPosX = ( + self.ChrLengthDistList[previous_chr_as_int - 1] + self.GraphInterval) * plotXScale if newStartPosX != oldStartPosX: startPosX += newStartPosX oldStartPosX = newStartPosX - #ZS: This is because the chromosome value stored in qtlresult['chr'] can be (for example) either X or 20 depending upon the mapping method/scale used + # ZS: This is because the chromosome value stored in qtlresult['chr'] can be (for example) either X or 20 depending upon the mapping method/scale used this_chr = str(self.ChrList[self.selectedChr][0]) if self.plotScale != "physic": - this_chr = str(self.ChrList[self.selectedChr][1]+1) + this_chr = str(self.ChrList[self.selectedChr][1] + 1) if self.selectedChr == -1 or str(qtlresult['chr']) == this_chr: if self.plotScale != "physic" and self.mapping_method == "reaper" and not self.manhattan_plot: - Xc = startPosX + (qtlresult['cM']-startMb)*plotXScale + Xc = startPosX + (qtlresult['cM'] - startMb) * plotXScale if hasattr(self.genotype, "filler"): if self.genotype.filler: if self.selectedChr != -1: start_cm = self.genotype[self.selectedChr - 1][0].cM - Xc = startPosX + (qtlresult['Mb'] - start_cm)*plotXScale + Xc = startPosX + \ + (qtlresult['Mb'] - start_cm) * plotXScale else: start_cm = self.genotype[previous_chr_as_int][0].cM - Xc = startPosX + ((qtlresult['Mb']-start_cm-startMb)*plotXScale)*(((qtlresult['Mb']-start_cm-startMb)*plotXScale)/((qtlresult['Mb']-start_cm-startMb+self.GraphInterval)*plotXScale)) + Xc = startPosX + ((qtlresult['Mb'] - start_cm - startMb) * plotXScale) * ( + ((qtlresult['Mb'] - start_cm - startMb) * plotXScale) / ((qtlresult['Mb'] - start_cm - startMb + self.GraphInterval) * plotXScale)) else: - Xc = startPosX + (qtlresult['Mb']-startMb)*plotXScale + Xc = startPosX + (qtlresult['Mb'] - startMb) * plotXScale # updated by NL 06-18-2011: # fix the over limit LRS graph issue since genotype trait may give infinite LRS; # for any lrs is over than 460(LRS max in this system), it will be reset to 460 - yLRS = yZero - (item/LRS_LOD_Max) * LRSHeightThresh - + yLRS = yZero - (item / LRS_LOD_Max) * LRSHeightThresh if 'lrs_value' in qtlresult: if self.LRS_LOD == "LOD" or self.LRS_LOD == "-logP": - if qtlresult['lrs_value'] > 460 or qtlresult['lrs_value']=='inf': + if qtlresult['lrs_value'] > 460 or qtlresult['lrs_value'] == 'inf': #Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/(LRSAxisList[-1]*self.LODFACTOR) - Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/(LRS_LOD_Max*self.LODFACTOR) + Yc = yZero - webqtlConfig.MAXLRS * \ + LRSHeightThresh / \ + (LRS_LOD_Max * self.LODFACTOR) else: #Yc = yZero - qtlresult['lrs_value']*LRSHeightThresh/(LRSAxisList[-1]*self.LODFACTOR) - Yc = yZero - qtlresult['lrs_value']*LRSHeightThresh/(LRS_LOD_Max*self.LODFACTOR) + Yc = yZero - \ + qtlresult['lrs_value'] * LRSHeightThresh / \ + (LRS_LOD_Max * self.LODFACTOR) else: - if qtlresult['lrs_value'] > 460 or qtlresult['lrs_value']=='inf': + if qtlresult['lrs_value'] > 460 or qtlresult['lrs_value'] == 'inf': #Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/LRSAxisList[-1] - Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/LRS_LOD_Max + Yc = yZero - webqtlConfig.MAXLRS * LRSHeightThresh / LRS_LOD_Max else: #Yc = yZero - qtlresult['lrs_value']*LRSHeightThresh/LRSAxisList[-1] - Yc = yZero - qtlresult['lrs_value']*LRSHeightThresh/LRS_LOD_Max + Yc = yZero - \ + qtlresult['lrs_value'] * \ + LRSHeightThresh / LRS_LOD_Max else: - if qtlresult['lod_score'] > 100 or qtlresult['lod_score']=='inf': + if qtlresult['lod_score'] > 100 or qtlresult['lod_score'] == 'inf': #Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/LRSAxisList[-1] - Yc = yZero - webqtlConfig.MAXLRS*LRSHeightThresh/LRS_LOD_Max + Yc = yZero - webqtlConfig.MAXLRS * LRSHeightThresh / LRS_LOD_Max else: if self.LRS_LOD == "LRS": #Yc = yZero - qtlresult['lod_score']*self.LODFACTOR*LRSHeightThresh/LRSAxisList[-1] - Yc = yZero - qtlresult['lod_score']*self.LODFACTOR*LRSHeightThresh/LRS_LOD_Max + Yc = yZero - \ + qtlresult['lod_score'] * self.LODFACTOR * \ + LRSHeightThresh / LRS_LOD_Max else: #Yc = yZero - qtlresult['lod_score']*LRSHeightThresh/LRSAxisList[-1] - Yc = yZero - qtlresult['lod_score']*LRSHeightThresh/LRS_LOD_Max + Yc = yZero - \ + qtlresult['lod_score'] * \ + LRSHeightThresh / LRS_LOD_Max if self.manhattan_plot == True: if self.color_scheme == "single": @@ -2469,17 +2628,19 @@ class DisplayMappingResults(object): im_drawer.text( text="5", xy=( - Xc-im_drawer.textsize("5", font=symbolFont)[0]/2+1, - Yc-4), + Xc - im_drawer.textsize("5", + font=symbolFont)[0] / 2 + 1, + Yc - 4), fill=point_color, font=symbolFont) else: LRSCoordXY.append((Xc, Yc)) if not self.multipleInterval and self.additiveChecked: - if additiveMax == 0.0: - additiveMax = 0.000001 - Yc = yZero - qtlresult['additive']*AdditiveHeightThresh/additiveMax - AdditiveCoordXY.append((Xc, Yc)) + if additiveMax == 0.0: + additiveMax = 0.000001 + Yc = yZero - qtlresult['additive'] * \ + AdditiveHeightThresh / additiveMax + AdditiveCoordXY.append((Xc, Yc)) m += 1 @@ -2497,64 +2658,65 @@ class DisplayMappingResults(object): minusColor = self.ADDITIVE_COLOR_NEGATIVE for k, aPoint in enumerate(AdditiveCoordXY): if k > 0: - Xc0, Yc0 = AdditiveCoordXY[k-1] + Xc0, Yc0 = AdditiveCoordXY[k - 1] Xc, Yc = aPoint - if (Yc0-yZero)*(Yc-yZero) < 0: - if Xc == Xc0: #genotype , locus distance is 0 + if (Yc0 - yZero) * (Yc - yZero) < 0: + if Xc == Xc0: # genotype , locus distance is 0 Xcm = Xc else: - Xcm = (yZero-Yc0)/((Yc-Yc0)/(Xc-Xc0)) +Xc0 + Xcm = (yZero - Yc0) / \ + ((Yc - Yc0) / (Xc - Xc0)) + Xc0 if Yc0 < yZero: im_drawer.line( xy=((Xc0, Yc0), (Xcm, yZero)), fill=plusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) im_drawer.line( - xy=((Xcm, yZero), (Xc, yZero-(Yc-yZero))), + xy=((Xcm, yZero), (Xc, yZero - (Yc - yZero))), fill=minusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) else: im_drawer.line( - xy=((Xc0, yZero - (Yc0-yZero)), + xy=((Xc0, yZero - (Yc0 - yZero)), (Xcm, yZero)), fill=minusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) im_drawer.line( xy=((Xcm, yZero), (Xc, Yc)), fill=plusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) - elif (Yc0-yZero)*(Yc-yZero) > 0: + elif (Yc0 - yZero) * (Yc - yZero) > 0: if Yc < yZero: im_drawer.line( xy=((Xc0, Yc0), (Xc, Yc)), fill=plusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) else: im_drawer.line( - xy=((Xc0, yZero-(Yc0-yZero)), - (Xc, yZero-(Yc-yZero))), + xy=((Xc0, yZero - (Yc0 - yZero)), + (Xc, yZero - (Yc - yZero))), fill=minusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) else: - minYc = min(Yc-yZero, Yc0-yZero) + minYc = min(Yc - yZero, Yc0 - yZero) if minYc < 0: im_drawer.line( xy=((Xc0, Yc0), (Xc, Yc)), fill=plusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) else: im_drawer.line( - xy=((Xc0, yZero - (Yc0-yZero)), - (Xc, yZero - (Yc-yZero))), + xy=((Xc0, yZero - (Yc0 - yZero)), + (Xc, yZero - (Yc - yZero))), fill=minusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) if not self.multipleInterval and INTERCROSS and self.dominanceChecked: @@ -2562,114 +2724,116 @@ class DisplayMappingResults(object): minusColor = self.DOMINANCE_COLOR_NEGATIVE for k, aPoint in enumerate(DominanceCoordXY): if k > 0: - Xc0, Yc0 = DominanceCoordXY[k-1] + Xc0, Yc0 = DominanceCoordXY[k - 1] Xc, Yc = aPoint - if (Yc0-yZero)*(Yc-yZero) < 0: - if Xc == Xc0: #genotype , locus distance is 0 + if (Yc0 - yZero) * (Yc - yZero) < 0: + if Xc == Xc0: # genotype , locus distance is 0 Xcm = Xc else: - Xcm = (yZero-Yc0)/((Yc-Yc0)/(Xc-Xc0)) +Xc0 + Xcm = (yZero - Yc0) / \ + ((Yc - Yc0) / (Xc - Xc0)) + Xc0 if Yc0 < yZero: im_drawer.line( xy=((Xc0, Yc0), (Xcm, yZero)), fill=plusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) im_drawer.line( - xy=((Xcm, yZero), (Xc, yZero-(Yc-yZero))), + xy=((Xcm, yZero), (Xc, yZero - (Yc - yZero))), fill=minusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) else: im_drawer.line( - xy=((Xc0, yZero - (Yc0-yZero)), (Xcm, yZero)), + xy=((Xc0, yZero - (Yc0 - yZero)), (Xcm, yZero)), fill=minusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) im_drawer.line( xy=((Xcm, yZero), (Xc, Yc)), fill=plusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) - elif (Yc0-yZero)*(Yc-yZero) > 0: + elif (Yc0 - yZero) * (Yc - yZero) > 0: if Yc < yZero: im_drawer.line( xy=((Xc0, Yc0), (Xc, Yc)), fill=plusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) else: im_drawer.line( - xy=((Xc0, yZero - (Yc0-yZero)), - (Xc, yZero - (Yc-yZero))), + xy=((Xc0, yZero - (Yc0 - yZero)), + (Xc, yZero - (Yc - yZero))), fill=minusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) else: - minYc = min(Yc-yZero, Yc0-yZero) + minYc = min(Yc - yZero, Yc0 - yZero) if minYc < 0: im_drawer.line( xy=((Xc0, Yc0), (Xc, Yc)), fill=plusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) else: im_drawer.line( - xy=((Xc0, yZero - (Yc0-yZero)), - (Xc, yZero - (Yc-yZero))), fill=minusColor, + xy=((Xc0, yZero - (Yc0 - yZero)), + (Xc, yZero - (Yc - yZero))), fill=minusColor, width=lineWidth - #, clipX=(xLeftOffset, xLeftOffset + plotWidth) + # , clipX=(xLeftOffset, xLeftOffset + plotWidth) ) - - ###draw additive scale + # draw additive scale if not self.multipleInterval and self.additiveChecked: - additiveScaleFont=ImageFont.truetype(font=VERDANA_FILE, size=16*zoom) + additiveScaleFont = ImageFont.truetype( + font=VERDANA_FILE, size=16 * zoom) additiveScale = Plot.detScaleOld(0, additiveMax) - additiveStep = (additiveScale[1]-additiveScale[0])/additiveScale[2] + additiveStep = (additiveScale[1] - \ + additiveScale[0]) / additiveScale[2] additiveAxisList = Plot.frange(0, additiveScale[1], additiveStep) - addPlotScale = AdditiveHeightThresh/additiveMax + addPlotScale = AdditiveHeightThresh / additiveMax TEXT_Y_DISPLACEMENT = -8 additiveAxisList.append(additiveScale[1]) for item in additiveAxisList: - additiveY = yZero - item*addPlotScale + additiveY = yZero - item * addPlotScale im_drawer.line( xy=((xLeftOffset + plotWidth, additiveY), - (xLeftOffset+4+ plotWidth, additiveY)), - fill=self.ADDITIVE_COLOR_POSITIVE, width=1*zoom) + (xLeftOffset + 4 + plotWidth, additiveY)), + fill=self.ADDITIVE_COLOR_POSITIVE, width=1 * zoom) scaleStr = "%2.3f" % item im_drawer.text( text=scaleStr, - xy=(xLeftOffset + plotWidth +6, additiveY+TEXT_Y_DISPLACEMENT), + xy=(xLeftOffset + plotWidth + 6, + additiveY + TEXT_Y_DISPLACEMENT), font=additiveScaleFont, fill=self.ADDITIVE_COLOR_POSITIVE) im_drawer.line( - xy=((xLeftOffset+plotWidth, additiveY), - (xLeftOffset+plotWidth, yZero)), - fill=self.ADDITIVE_COLOR_POSITIVE, width=1*zoom) + xy=((xLeftOffset + plotWidth, additiveY), + (xLeftOffset + plotWidth, yZero)), + fill=self.ADDITIVE_COLOR_POSITIVE, width=1 * zoom) im_drawer.line( - xy=((xLeftOffset, yZero), (xLeftOffset, yTopOffset + 30*(zoom - 1))), - fill=self.LRS_COLOR, width=1*zoom) #the blue line running up the y axis - - - def drawGraphBackground(self, canvas, gifmap, offset= (80, 120, 80, 50), zoom = 1, startMb = None, endMb = None): - ##conditions - ##multiple Chromosome view - ##single Chromosome Physical - ##single Chromosome Genetic + xy=((xLeftOffset, yZero), (xLeftOffset, yTopOffset + 30 * (zoom - 1))), + fill=self.LRS_COLOR, width=1 * zoom) # the blue line running up the y axis + + def drawGraphBackground(self, canvas, gifmap, offset=(80, 120, 80, 50), zoom=1, startMb=None, endMb=None): + # conditions + # multiple Chromosome view + # single Chromosome Physical + # single Chromosome Genetic im_drawer = ImageDraw.Draw(canvas) xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset plotWidth = canvas.size[0] - xLeftOffset - xRightOffset plotHeight = canvas.size[1] - yTopOffset - yBottomOffset - yBottom = yTopOffset+plotHeight + yBottom = yTopOffset + plotHeight fontZoom = zoom if zoom == 2: fontZoom = 1.5 yTopOffset += 30 - #calculate plot scale + # calculate plot scale if self.plotScale != 'physic': self.ChrLengthDistList = self.ChrLengthCMList drawRegionDistance = self.ChrLengthCMSum @@ -2677,10 +2841,10 @@ class DisplayMappingResults(object): self.ChrLengthDistList = self.ChrLengthMbList drawRegionDistance = self.ChrLengthMbSum - if self.selectedChr > -1: #single chromosome view - spacingAmt = plotWidth/13.5 + if self.selectedChr > -1: # single chromosome view + spacingAmt = plotWidth / 13.5 i = 0 - for startPix in Plot.frange(xLeftOffset, xLeftOffset+plotWidth, spacingAmt): + for startPix in Plot.frange(xLeftOffset, xLeftOffset + plotWidth, spacingAmt): if (i % 2 == 0): theBackColor = self.GRAPH_BACK_DARK_COLOR else: @@ -2688,25 +2852,27 @@ class DisplayMappingResults(object): i += 1 im_drawer.rectangle( [(startPix, yTopOffset), - (min(startPix+spacingAmt, xLeftOffset+plotWidth), yBottom)], + (min(startPix + spacingAmt, xLeftOffset + plotWidth), yBottom)], outline=theBackColor, fill=theBackColor) drawRegionDistance = self.ChrLengthDistList[self.ChrList[self.selectedChr][1]] self.ChrLengthDistList = [drawRegionDistance] if self.plotScale == 'physic': - plotXScale = plotWidth / (endMb-startMb) + plotXScale = plotWidth / (endMb - startMb) else: plotXScale = plotWidth / drawRegionDistance - else: #multiple chromosome view - plotXScale = plotWidth / ((len(self.genotype)-1)*self.GraphInterval + drawRegionDistance) + else: # multiple chromosome view + plotXScale = plotWidth / \ + ((len(self.genotype) - 1) * self.GraphInterval + drawRegionDistance) startPosX = xLeftOffset if fontZoom == 1.5: chrFontZoom = 2 else: chrFontZoom = 1 - chrLabelFont=ImageFont.truetype(font=VERDANA_FILE, size=24*chrFontZoom) + chrLabelFont = ImageFont.truetype( + font=VERDANA_FILE, size=24 * chrFontZoom) for i, _chr in enumerate(self.genotype): if (i % 2 == 0): @@ -2714,23 +2880,27 @@ class DisplayMappingResults(object): else: theBackColor = self.GRAPH_BACK_LIGHT_COLOR - #draw the shaded boxes and the sig/sug thick lines + # draw the shaded boxes and the sig/sug thick lines im_drawer.rectangle( ((startPosX, yTopOffset), - (startPosX + self.ChrLengthDistList[i]*plotXScale, yBottom)), + (startPosX + self.ChrLengthDistList[i] * plotXScale, yBottom)), outline=GAINSBORO, fill=theBackColor) - chrNameWidth, chrNameHeight = im_drawer.textsize(_chr.name, font=chrLabelFont) - chrStartPix = startPosX + (self.ChrLengthDistList[i]*plotXScale -chrNameWidth)/2 - chrEndPix = startPosX + (self.ChrLengthDistList[i]*plotXScale +chrNameWidth)/2 + chrNameWidth, chrNameHeight = im_drawer.textsize( + _chr.name, font=chrLabelFont) + chrStartPix = startPosX + \ + (self.ChrLengthDistList[i] * plotXScale - chrNameWidth) / 2 + chrEndPix = startPosX + \ + (self.ChrLengthDistList[i] * plotXScale + chrNameWidth) / 2 TEXT_Y_DISPLACEMENT = 0 im_drawer.text(xy=(chrStartPix, yTopOffset + TEXT_Y_DISPLACEMENT), text=_chr.name, font=chrLabelFont, fill=BLACK) - COORDS = "%d,%d,%d,%d" %(chrStartPix, yTopOffset, chrEndPix, yTopOffset +20) + COORDS = "%d,%d,%d,%d" % ( + chrStartPix, yTopOffset, chrEndPix, yTopOffset + 20) - #add by NL 09-03-2010 + # add by NL 09-03-2010 HREF = "javascript:chrView(%d,%s);" % (i, self.ChrLengthMbList) #HREF = "javascript:changeView(%d,%s);" % (i,self.ChrLengthMbList) Areas = HtmlGenWrapper.create_area_tag( @@ -2738,7 +2908,8 @@ class DisplayMappingResults(object): coords=COORDS, href=HREF) gifmap.append(Areas) - startPosX += (self.ChrLengthDistList[i]+self.GraphInterval)*plotXScale + startPosX += (self.ChrLengthDistList[i] + \ + self.GraphInterval) * plotXScale return plotXScale @@ -2748,15 +2919,16 @@ class DisplayMappingResults(object): ######################################### myCanvas = Image.new("RGBA", size=(500, 300)) if 'lod_score' in self.qtlresults[0] and self.LRS_LOD == "LRS": - perm_output = [value*4.61 for value in self.perm_output] + perm_output = [value * 4.61 for value in self.perm_output] elif 'lod_score' not in self.qtlresults[0] and self.LRS_LOD == "LOD": - perm_output = [value/4.61 for value in self.perm_output] + perm_output = [value / 4.61 for value in self.perm_output] else: perm_output = self.perm_output - filename= webqtlUtil.genRandStr("Reg_") - Plot.plotBar(myCanvas, perm_output, XLabel=self.LRS_LOD, YLabel='Frequency', title=' Histogram of Permutation Test') - myCanvas.save("{}.gif".format(GENERATED_IMAGE_DIR+filename), + filename = webqtlUtil.genRandStr("Reg_") + Plot.plotBar(myCanvas, perm_output, XLabel=self.LRS_LOD, + YLabel='Frequency', title=' Histogram of Permutation Test') + myCanvas.save("{}.gif".format(GENERATED_IMAGE_DIR + filename), format='gif') return filename @@ -2775,16 +2947,16 @@ class DisplayMappingResults(object): if self.dataset.group.species == "mouse": if refGene: gene_table_header_list = ["Index", - "Symbol", - "Mb Start", - "Length (Kb)", - "SNP Count", - "SNP Density", - "Avg Expr", - "Human Chr", - "Mb Start (hg19)", - "Literature Correlation", - "Gene Description"] + "Symbol", + "Mb Start", + "Length (Kb)", + "SNP Count", + "SNP Density", + "Avg Expr", + "Human Chr", + "Mb Start (hg19)", + "Literature Correlation", + "Gene Description"] else: gene_table_header_list = ["", "Index", @@ -2821,20 +2993,21 @@ class DisplayMappingResults(object): tableIterationsCnt = tableIterationsCnt + 1 - this_row = [] #container for the cells of each row + this_row = [] # container for the cells of each row selectCheck = HtmlGenWrapper.create_input_tag( type_="checkbox", name="selectCheck", value=theGO["GeneSymbol"], Class="checkbox trait_checkbox") # checkbox for each row - geneLength = (theGO["TxEnd"] - theGO["TxStart"])*1000.0 - tenPercentLength = geneLength*0.0001 + geneLength = (theGO["TxEnd"] - theGO["TxStart"]) * 1000.0 + tenPercentLength = geneLength * 0.0001 txStart = theGO["TxStart"] txEnd = theGO["TxEnd"] - theGO["snpDensity"] = theGO["snpCount"]/geneLength + theGO["snpDensity"] = theGO["snpCount"] / geneLength if self.ALEX_DEBUG_BOOL_PRINT_GENE_LIST: - geneIdString = 'http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=Retrieve&dopt=Graphics&list_uids=%s' % theGO["GeneID"] + geneIdString = 'http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=Retrieve&dopt=Graphics&list_uids=%s' % theGO[ + "GeneID"] if theGO["snpCount"]: snpString = HT.Link( @@ -2844,16 +3017,18 @@ class DisplayMappingResults(object): f"end={theGO['TxEnd']}&" f"geneName={theGO['GeneSymbol']}&" f"s1={self.diffCol[0]}&s2=%d"), - str(theGO["snpCount"]) # The text to display + str(theGO["snpCount"]) # The text to display ) snpString.set_blank_target() snpString.set_attribute("class", "normalsize") else: snpString = 0 - mouseStartString = "http://genome.ucsc.edu/cgi-bin/hgTracks?clade=vertebrate&org=Mouse&db=mm9&position=chr" + theGO["Chromosome"] + "%3A" + str(int(theGO["TxStart"] * 1000000.0)) + "-" + str(int(theGO["TxEnd"]*1000000.0)) +"&pix=620&Submit=submit" + mouseStartString = "http://genome.ucsc.edu/cgi-bin/hgTracks?clade=vertebrate&org=Mouse&db=mm9&position=chr" + \ + theGO["Chromosome"] + "%3A" + str(int(theGO["TxStart"] * 1000000.0)) + "-" + str( + int(theGO["TxEnd"] * 1000000.0)) + "&pix=620&Submit=submit" - #the chromosomes for human 1 are 1qXX.XX + # the chromosomes for human 1 are 1qXX.XX if theGO['humanGene']: if theGO['humanGene']["TxStart"] == '': humanStartDisplay = "" @@ -2863,20 +3038,21 @@ class DisplayMappingResults(object): humanChr = theGO['humanGene']["Chromosome"] humanTxStart = theGO['humanGene']["TxStart"] - humanStartString = "http://genome.ucsc.edu/cgi-bin/hgTracks?clade=vertebrate&org=Human&db=hg17&position=chr%s:%d-%d" % (humanChr, int(1000000*theGO['humanGene']["TxStart"]), int(1000000*theGO['humanGene']["TxEnd"])) + humanStartString = "http://genome.ucsc.edu/cgi-bin/hgTracks?clade=vertebrate&org=Human&db=hg17&position=chr%s:%d-%d" % ( + humanChr, int(1000000 * theGO['humanGene']["TxStart"]), int(1000000 * theGO['humanGene']["TxEnd"])) else: humanStartString = humanChr = humanStartDisplay = "--" geneDescription = theGO["GeneDescription"] if len(geneDescription) > 70: - geneDescription = geneDescription[:70]+"..." + geneDescription = geneDescription[:70] + "..." if theGO["snpDensity"] < 0.000001: snpDensityStr = "0" else: snpDensityStr = "%0.6f" % theGO["snpDensity"] - avgExpr = [] #theGO["avgExprVal"] + avgExpr = [] # theGO["avgExprVal"] if avgExpr in ([], None): avgExpr = "--" else: @@ -2888,7 +3064,8 @@ class DisplayMappingResults(object): else: chr_as_int = int(theGO["Chromosome"]) - 1 if refGene: - literatureCorrelationString = str(self.getLiteratureCorrelation(self.cursor, refGene, theGO['GeneID']) or "N/A") + literatureCorrelationString = str(self.getLiteratureCorrelation( + self.cursor, refGene, theGO['GeneID']) or "N/A") this_row = [selectCheck.__str__(), str(tableIterationsCnt), @@ -2896,17 +3073,17 @@ class DisplayMappingResults(object): geneIdString, theGO["GeneSymbol"], target="_blank") - ), + ), str(HtmlGenWrapper.create_link_tag( mouseStartString, "{:.6f}".format(txStart), target="_blank") - ), + ), str(HtmlGenWrapper.create_link_tag( "javascript:rangeView('{}', {:f}, {:f})".format( str(chr_as_int), - txStart-tenPercentLength, - txEnd+tenPercentLength), + txStart - tenPercentLength, + txEnd + tenPercentLength), "{:.3f}".format(geneLength))), snpString, snpDensityStr, @@ -2931,8 +3108,8 @@ class DisplayMappingResults(object): str(HtmlGenWrapper.create_link_tag( "javascript:rangeView('{}', {:f}, {:f})".format( str(chr_as_int), - txStart-tenPercentLength, - txEnd+tenPercentLength), + txStart - tenPercentLength, + txEnd + tenPercentLength), "{:.3f}".format(geneLength))), snpString, snpDensityStr, @@ -2956,7 +3133,8 @@ class DisplayMappingResults(object): if theGO["GeneID"] != "": geneSymbolNCBI = str(HtmlGenWrapper.create_link_tag( - "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=Retrieve&dopt=Graphics&list_uids={}".format(theGO["GeneID"]), + "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=Retrieve&dopt=Graphics&list_uids={}".format( + theGO["GeneID"]), theGO["GeneSymbol"], Class="normalsize", target="_blank")) @@ -2969,7 +3147,8 @@ class DisplayMappingResults(object): chr_as_int = int(theGO["Chromosome"]) - 1 geneLength = (float(theGO["TxEnd"]) - float(theGO["TxStart"])) - geneLengthURL = "javascript:rangeView('%s', %f, %f)" % (theGO["Chromosome"], float(theGO["TxStart"])-(geneLength*0.1), float(theGO["TxEnd"])+(geneLength*0.1)) + geneLengthURL = "javascript:rangeView('%s', %f, %f)" % (theGO["Chromosome"], float( + theGO["TxStart"]) - (geneLength * 0.1), float(theGO["TxEnd"]) + (geneLength * 0.1)) avgExprVal = [] if avgExprVal != "" and avgExprVal: @@ -2977,14 +3156,14 @@ class DisplayMappingResults(object): else: avgExprVal = "" - #Mouse Gene + # Mouse Gene if theGO['mouseGene']: mouseChr = theGO['mouseGene']["Chromosome"] mouseTxStart = "%0.6f" % theGO['mouseGene']["TxStart"] else: mouseChr = mouseTxStart = "" - #the chromosomes for human 1 are 1qXX.XX + # the chromosomes for human 1 are 1qXX.XX if theGO['humanGene']: humanChr = theGO['humanGene']["Chromosome"] humanTxStart = "%0.6f" % theGO['humanGene']["TxStart"] @@ -2996,12 +3175,12 @@ class DisplayMappingResults(object): geneDesc = "" this_row = [selectCheck.__str__(), - str(gIndex+1), + str(gIndex + 1), geneSymbolNCBI, "%0.6f" % theGO["TxStart"], str(HtmlGenWrapper.create_link_tag( geneLengthURL, - "{:.3f}".format(geneLength*1000.0))), + "{:.3f}".format(geneLength * 1000.0))), avgExprVal, mouseChr, mouseTxStart, @@ -3013,7 +3192,7 @@ class DisplayMappingResults(object): return gene_table_body - def getLiteratureCorrelation(cursor,geneId1=None,geneId2=None): + def getLiteratureCorrelation(cursor, geneId1=None, geneId2=None): if not geneId1 or not geneId2: return None if geneId1 == geneId2: @@ -3025,9 +3204,10 @@ class DisplayMappingResults(object): query = 'SELECT Value FROM LCorrRamin3 WHERE GeneId1 = %s and GeneId2 = %s' for x, y in [(geneId1, geneId2), (geneId2, geneId1)]: cursor.execute(query, (x, y)) - lCorr = cursor.fetchone() + lCorr = cursor.fetchone() if lCorr: lCorr = lCorr[0] break - except: raise #lCorr = None + except: + raise # lCorr = None return lCorr diff --git a/wqflask/wqflask/marker_regression/gemma_mapping.py b/wqflask/wqflask/marker_regression/gemma_mapping.py index 83ebcdf9..f88c5ac8 100644 --- a/wqflask/wqflask/marker_regression/gemma_mapping.py +++ b/wqflask/wqflask/marker_regression/gemma_mapping.py @@ -146,7 +146,13 @@ def gen_covariates_file(this_dataset, covariates, samples): for covariate in covariate_list: this_covariate_data = [] trait_name = covariate.split(":")[0] - dataset_ob = create_dataset(covariate.split(":")[1]) + dataset_name = covariate.split(":")[1] + if dataset_name == "Temp": + temp_group = trait_name.split("_")[2] + dataset_ob = create_dataset( + dataset_name="Temp", dataset_type="Temp", group_name=temp_group) + else: + dataset_ob = create_dataset(covariate.split(":")[1]) trait_ob = create_trait(dataset=dataset_ob, name=trait_name, cellid=None) diff --git a/wqflask/wqflask/marker_regression/plink_mapping.py b/wqflask/wqflask/marker_regression/plink_mapping.py index 5d675c38..2fa80841 100644 --- a/wqflask/wqflask/marker_regression/plink_mapping.py +++ b/wqflask/wqflask/marker_regression/plink_mapping.py @@ -6,13 +6,14 @@ from utility import webqtlUtil from utility.tools import flat_files, PLINK_COMMAND import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) + def run_plink(this_trait, dataset, species, vals, maf): - plink_output_filename = webqtlUtil.genRandStr(f"{dataset.group.name}_{this_trait.name}_") + plink_output_filename = webqtlUtil.genRandStr( + f"{dataset.group.name}_{this_trait.name}_") gen_pheno_txt_file(dataset, vals) - plink_command = f"{PLINK_COMMAND} --noweb --bfile {flat_files('mapping')}/{dataset.group.name} --no-pheno --no-fid --no-parents --no-sex --maf {maf} --out { TMPDIR}{plink_output_filename} --assoc " logger.debug("plink_command:", plink_command) @@ -25,6 +26,7 @@ def run_plink(this_trait, dataset, species, vals, maf): return dataset.group.markers.markers + def gen_pheno_txt_file(this_dataset, vals): """Generates phenotype file for GEMMA/PLINK""" @@ -34,15 +36,17 @@ def gen_pheno_txt_file(this_dataset, vals): split_line = line.split() current_file_data.append(split_line) - with open(f"{flat_files('mapping')}/{this_dataset.group.name}.fam","w") as outfile: + with open(f"{flat_files('mapping')}/{this_dataset.group.name}.fam", "w") as outfile: for i, line in enumerate(current_file_data): if vals[i] == "x": this_val = -9 else: this_val = vals[i] - outfile.write("0 " + line[1] + " " + line[2] + " " + line[3] + " " + line[4] + " " + str(this_val) + "\n") + outfile.write("0 " + line[1] + " " + line[2] + " " + \ + line[3] + " " + line[4] + " " + str(this_val) + "\n") + -def gen_pheno_txt_file_plink(this_trait, dataset, vals, pheno_filename = ''): +def gen_pheno_txt_file_plink(this_trait, dataset, vals, pheno_filename=''): ped_sample_list = get_samples_from_ped_file(dataset) output_file = open(f"{TMPDIR}{pheno_filename}.txt", "wb") header = f"FID\tIID\t{this_trait.name}\n" @@ -50,7 +54,7 @@ def gen_pheno_txt_file_plink(this_trait, dataset, vals, pheno_filename = ''): new_value_list = [] - #if valueDict does not include some strain, value will be set to -9999 as missing value + # if valueDict does not include some strain, value will be set to -9999 as missing value for i, sample in enumerate(ped_sample_list): try: value = vals[i] @@ -63,11 +67,11 @@ def gen_pheno_txt_file_plink(this_trait, dataset, vals, pheno_filename = ''): new_line = '' for i, sample in enumerate(ped_sample_list): - j = i+1 + j = i + 1 value = new_value_list[i] new_line += f"{sample}\t{sample}\t{value}\n" - if j%1000 == 0: + if j % 1000 == 0: output_file.write(newLine) new_line = '' @@ -77,10 +81,12 @@ def gen_pheno_txt_file_plink(this_trait, dataset, vals, pheno_filename = ''): output_file.close() # get strain name from ped file in order + + def get_samples_from_ped_file(dataset): - ped_file= open(f"{flat_files('mapping')}{dataset.group.name}.ped","r") + ped_file = open(f"{flat_files('mapping')}{dataset.group.name}.ped", "r") line = ped_file.readline() - sample_list=[] + sample_list = [] while line: lineList = line.strip().split('\t') @@ -93,25 +99,26 @@ def get_samples_from_ped_file(dataset): return sample_list + def parse_plink_output(output_filename, species): - plink_results={} + plink_results = {} threshold_p_value = 1 - result_fp = open(f"{TMPDIR}{output_filename}.qassoc","rb") + result_fp = open(f"{TMPDIR}{output_filename}.qassoc", "rb") line = result_fp.readline() - value_list = [] # initialize value list, this list will include snp, bp and pvalue info + value_list = [] # initialize value list, this list will include snp, bp and pvalue info p_value_dict = {} count = 0 while line: - #convert line from str to list + # convert line from str to list line_list = build_line_list(line=line) # only keep the records whose chromosome name is in db - if int(line_list[0]) in species.chromosomes.chromosomes and line_list[-1] and line_list[-1].strip()!='NA': + if int(line_list[0]) in species.chromosomes.chromosomes and line_list[-1] and line_list[-1].strip() != 'NA': chr_name = species.chromosomes.chromosomes[int(line_list[0])] snp = line_list[1] @@ -125,7 +132,7 @@ def parse_plink_output(output_filename, species): value_list = plink_results[chr_name] # pvalue range is [0,1] - if threshold_p_value >=0 and threshold_p_value <= 1: + if threshold_p_value >= 0 and threshold_p_value <= 1: if p_value < threshold_p_value: value_list.append((snp, BP, p_value)) count += 1 @@ -141,7 +148,7 @@ def parse_plink_output(output_filename, species): if value_list: plink_results[chr_name] = value_list - value_list=[] + value_list = [] line = result_fp.readline() else: @@ -154,9 +161,12 @@ def parse_plink_output(output_filename, species): # function: convert line from str to list; # output: lineList list ####################################################### + + def build_line_list(line=""): - line_list = line.strip().split(' ')# irregular number of whitespaces between columns - line_list = [item for item in line_list if item !=''] + # irregular number of whitespaces between columns + line_list = line.strip().split(' ') + line_list = [item for item in line_list if item != ''] line_list = [item.strip() for item in line_list] return line_list diff --git a/wqflask/wqflask/marker_regression/qtlreaper_mapping.py b/wqflask/wqflask/marker_regression/qtlreaper_mapping.py index 8341ee55..4d6715ba 100644 --- a/wqflask/wqflask/marker_regression/qtlreaper_mapping.py +++ b/wqflask/wqflask/marker_regression/qtlreaper_mapping.py @@ -1,4 +1,9 @@ -import os, math, string, random, json, re +import os +import math +import string +import random +import json +import re from base import webqtlConfig from base.trait import GeneralTrait @@ -6,7 +11,8 @@ from base.data_set import create_dataset from utility.tools import flat_files, REAPER_COMMAND, TEMPDIR import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) + def run_reaper(this_trait, this_dataset, samples, vals, json_data, num_perm, boot_check, num_bootstrap, do_control, control_marker, manhattan_plot, first_run=True, output_files=None): """Generates p-values for each marker using qtlreaper""" @@ -17,66 +23,73 @@ def run_reaper(this_trait, this_dataset, samples, vals, json_data, num_perm, boo else: genofile_name = this_dataset.group.name - trait_filename =f"{str(this_trait.name)}_{str(this_dataset.name)}_pheno" + trait_filename = f"{str(this_trait.name)}_{str(this_dataset.name)}_pheno" gen_pheno_txt_file(samples, vals, trait_filename) - output_filename = (f"{this_dataset.group.name}_GWA_"+ - ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) - ) + output_filename = (f"{this_dataset.group.name}_GWA_" + + ''.join(random.choice(string.ascii_uppercase + string.digits) + for _ in range(6)) + ) bootstrap_filename = None permu_filename = None opt_list = [] if boot_check and num_bootstrap > 0: - bootstrap_filename = (f"{this_dataset.group.name}_BOOTSTRAP_" + - ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) - ) + bootstrap_filename = (f"{this_dataset.group.name}_BOOTSTRAP_" + + ''.join(random.choice(string.ascii_uppercase + string.digits) + for _ in range(6)) + ) opt_list.append("-b") opt_list.append(f"--n_bootstrap {str(num_bootstrap)}") - opt_list.append(f"--bootstrap_output {webqtlConfig.GENERATED_IMAGE_DIR}{bootstrap_filename}.txt") + opt_list.append( + f"--bootstrap_output {webqtlConfig.GENERATED_IMAGE_DIR}{bootstrap_filename}.txt") if num_perm > 0: - permu_filename =("{this_dataset.group.name}_PERM_" + - ''.join(random.choice(string.ascii_uppercase + - string.digits) for _ in range(6)) - ) + permu_filename = ("{this_dataset.group.name}_PERM_" + + ''.join(random.choice(string.ascii_uppercase + + string.digits) for _ in range(6)) + ) opt_list.append("-n " + str(num_perm)) - opt_list.append("--permu_output " + webqtlConfig.GENERATED_IMAGE_DIR + permu_filename + ".txt") + opt_list.append( + "--permu_output " + webqtlConfig.GENERATED_IMAGE_DIR + permu_filename + ".txt") if control_marker != "" and do_control == "true": opt_list.append("-c " + control_marker) if manhattan_plot != True: opt_list.append("--interval 1") - reaper_command = (REAPER_COMMAND + - ' --geno {0}/{1}.geno --traits {2}/gn2/{3}.txt {4} -o {5}{6}.txt'.format(flat_files('genotype'), + reaper_command = (REAPER_COMMAND + + ' --geno {0}/{1}.geno --traits {2}/gn2/{3}.txt {4} -o {5}{6}.txt'.format(flat_files('genotype'), - genofile_name, - TEMPDIR, - trait_filename, - " ".join(opt_list), - webqtlConfig.GENERATED_IMAGE_DIR, - output_filename)) + genofile_name, + TEMPDIR, + trait_filename, + " ".join( + opt_list), + webqtlConfig.GENERATED_IMAGE_DIR, + output_filename)) logger.debug("reaper_command:" + reaper_command) os.system(reaper_command) else: output_filename, permu_filename, bootstrap_filename = output_files - marker_obs, permu_vals, bootstrap_vals = parse_reaper_output(output_filename, permu_filename, bootstrap_filename) + marker_obs, permu_vals, bootstrap_vals = parse_reaper_output( + output_filename, permu_filename, bootstrap_filename) suggestive = 0 significant = 0 if len(permu_vals) > 0: - suggestive = permu_vals[int(num_perm*0.37-1)] - significant = permu_vals[int(num_perm*0.95-1)] + suggestive = permu_vals[int(num_perm * 0.37 - 1)] + significant = permu_vals[int(num_perm * 0.95 - 1)] + + return (marker_obs, permu_vals, suggestive, significant, bootstrap_vals, + [output_filename, permu_filename, bootstrap_filename]) - return (marker_obs, permu_vals, suggestive, significant, bootstrap_vals, - [output_filename, permu_filename, bootstrap_filename]) def gen_pheno_txt_file(samples, vals, trait_filename): """Generates phenotype file for GEMMA""" - with open(f"{TEMPDIR}/gn2/{trait_filename}.txt","w") as outfile: + with open(f"{TEMPDIR}/gn2/{trait_filename}.txt", "w") as outfile: outfile.write("Trait\t") filtered_sample_list = [] @@ -92,6 +105,7 @@ def gen_pheno_txt_file(samples, vals, trait_filename): values_string = "\t".join(filtered_vals_list) outfile.write(values_string) + def parse_reaper_output(gwa_filename, permu_filename, bootstrap_filename): included_markers = [] p_values = [] @@ -121,7 +135,7 @@ def parse_reaper_output(gwa_filename, permu_filename, bootstrap_filename): marker['cM'] = float(line.split("\t")[3]) else: if float(line.split("\t")[3]) > 1000: - marker['Mb'] = float(line.split("\t")[3])/1000000 + marker['Mb'] = float(line.split("\t")[3]) / 1000000 else: marker['Mb'] = float(line.split("\t")[3]) if float(line.split("\t")[6]) != 1: @@ -132,7 +146,7 @@ def parse_reaper_output(gwa_filename, permu_filename, bootstrap_filename): else: marker['cM'] = float(line.split("\t")[3]) if float(line.split("\t")[4]) > 1000: - marker['Mb'] = float(line.split("\t")[4])/1000000 + marker['Mb'] = float(line.split("\t")[4]) / 1000000 else: marker['Mb'] = float(line.split("\t")[4]) if float(line.split("\t")[7]) != 1: @@ -142,7 +156,7 @@ def parse_reaper_output(gwa_filename, permu_filename, bootstrap_filename): marker['additive'] = float(line.split("\t")[6]) marker_obs.append(marker) - #ZS: Results have to be reordered because the new reaper returns results sorted alphabetically by chr for some reason, resulting in chr 1 being followed by 10, etc + # ZS: Results have to be reordered because the new reaper returns results sorted alphabetically by chr for some reason, resulting in chr 1 being followed by 10, etc sorted_indices = natural_sort(marker_obs) permu_vals = [] @@ -163,6 +177,7 @@ def parse_reaper_output(gwa_filename, permu_filename, bootstrap_filename): return marker_obs, permu_vals, bootstrap_vals + def run_original_reaper(this_trait, dataset, samples_before, trait_vals, json_data, num_perm, bootCheck, num_bootstrap, do_control, control_marker, manhattan_plot): genotype = dataset.group.read_genotype_file(use_reaper=True) @@ -185,18 +200,19 @@ def run_original_reaper(this_trait, dataset, samples_before, trait_vals, json_da suggestive = 0 significant = 0 else: - perm_output = genotype.permutation(strains = trimmed_samples, trait = trimmed_values, nperm=num_perm) - suggestive = perm_output[int(num_perm*0.37-1)] - significant = perm_output[int(num_perm*0.95-1)] - #highly_significant = perm_output[int(num_perm*0.99-1)] #ZS: Currently not used, but leaving it here just in case + perm_output = genotype.permutation( + strains=trimmed_samples, trait=trimmed_values, nperm=num_perm) + suggestive = perm_output[int(num_perm * 0.37 - 1)] + significant = perm_output[int(num_perm * 0.95 - 1)] + # highly_significant = perm_output[int(num_perm*0.99-1)] #ZS: Currently not used, but leaving it here just in case json_data['suggestive'] = suggestive json_data['significant'] = significant if control_marker != "" and do_control == "true": - reaper_results = genotype.regression(strains = trimmed_samples, - trait = trimmed_values, - control = str(control_marker)) + reaper_results = genotype.regression(strains=trimmed_samples, + trait=trimmed_values, + control=str(control_marker)) if bootCheck: control_geno = [] control_geno2 = [] @@ -215,31 +231,31 @@ def run_original_reaper(this_trait, dataset, samples_before, trait_vals, json_da _idx = _prgy.index(_strain) control_geno.append(control_geno2[_idx]) - bootstrap_results = genotype.bootstrap(strains = trimmed_samples, - trait = trimmed_values, - control = control_geno, - nboot = num_bootstrap) + bootstrap_results = genotype.bootstrap(strains=trimmed_samples, + trait=trimmed_values, + control=control_geno, + nboot=num_bootstrap) else: - reaper_results = genotype.regression(strains = trimmed_samples, - trait = trimmed_values) + reaper_results = genotype.regression(strains=trimmed_samples, + trait=trimmed_values) if bootCheck: - bootstrap_results = genotype.bootstrap(strains = trimmed_samples, - trait = trimmed_values, - nboot = num_bootstrap) + bootstrap_results = genotype.bootstrap(strains=trimmed_samples, + trait=trimmed_values, + nboot=num_bootstrap) json_data['chr'] = [] json_data['pos'] = [] json_data['lod.hk'] = [] json_data['markernames'] = [] - #if self.additive: + # if self.additive: # self.json_data['additive'] = [] - #Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary + # Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary qtl_results = [] for qtl in reaper_results: reaper_locus = qtl.locus - #ZS: Convert chr to int + # ZS: Convert chr to int converted_chr = reaper_locus.chr if reaper_locus.chr != "X" and reaper_locus.chr != "X/Y": converted_chr = int(reaper_locus.chr) @@ -247,19 +263,22 @@ def run_original_reaper(this_trait, dataset, samples_before, trait_vals, json_da json_data['pos'].append(reaper_locus.Mb) json_data['lod.hk'].append(qtl.lrs) json_data['markernames'].append(reaper_locus.name) - #if self.additive: + # if self.additive: # self.json_data['additive'].append(qtl.additive) - locus = {"name":reaper_locus.name, "chr":reaper_locus.chr, "cM":reaper_locus.cM, "Mb":reaper_locus.Mb} - qtl = {"lrs_value": qtl.lrs, "chr":converted_chr, "Mb":reaper_locus.Mb, - "cM":reaper_locus.cM, "name":reaper_locus.name, "additive":qtl.additive, "dominance":qtl.dominance} + locus = {"name": reaper_locus.name, "chr": reaper_locus.chr, + "cM": reaper_locus.cM, "Mb": reaper_locus.Mb} + qtl = {"lrs_value": qtl.lrs, "chr": converted_chr, "Mb": reaper_locus.Mb, + "cM": reaper_locus.cM, "name": reaper_locus.name, "additive": qtl.additive, "dominance": qtl.dominance} qtl_results.append(qtl) return qtl_results, json_data, perm_output, suggestive, significant, bootstrap_results + def natural_sort(marker_list): """ Function to naturally sort numbers + strings, adopted from user Mark Byers here: https://stackoverflow.com/questions/4836710/does-python-have-a-built-in-function-for-string-natural-sort Changed to return indices instead of values, though, since the same reordering needs to be applied to bootstrap results """ convert = lambda text: int(text) if text.isdigit() else text.lower() - alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', str(marker_list[key]['chr'])) ] - return sorted(list(range(len(marker_list))), key = alphanum_key)
\ No newline at end of file + alphanum_key = lambda key: [convert(c) for c in re.split( + '([0-9]+)', str(marker_list[key]['chr']))] + return sorted(list(range(len(marker_list))), key=alphanum_key) diff --git a/wqflask/wqflask/marker_regression/rqtl_mapping.py b/wqflask/wqflask/marker_regression/rqtl_mapping.py index 4117a0e5..09afb8d1 100644 --- a/wqflask/wqflask/marker_regression/rqtl_mapping.py +++ b/wqflask/wqflask/marker_regression/rqtl_mapping.py @@ -1,405 +1,133 @@ -import rpy2.robjects as ro -import rpy2.robjects.numpy2ri as np2r -import numpy as np -import json +import csv +import hashlib +import io +import requests +import shutil +from typing import Dict +from typing import List +from typing import Optional +from typing import TextIO -from flask import g +import numpy as np from base.webqtlConfig import TMPDIR from base.trait import create_trait -from base.data_set import create_dataset -from utility import webqtlUtil -from utility.tools import locate, TEMPDIR -from flask import g +from utility.tools import locate import utility.logger -logger = utility.logger.getLogger(__name__ ) - -# Get a trait's type (numeric, categorical, etc) from the DB -def get_trait_data_type(trait_db_string): - logger.info("get_trait_data_type"); - the_query = "SELECT value FROM TraitMetadata WHERE type='trait_data_type'" - logger.info("the_query done"); - results_json = g.db.execute(the_query).fetchone() - logger.info("the_query executed"); - results_ob = json.loads(results_json[0]) - logger.info("json results loaded"); - if trait_db_string in results_ob: - logger.info("found"); - return results_ob[trait_db_string] - else: - logger.info("not found"); - return "numeric" - - -# Run qtl mapping using R/qtl -def run_rqtl_geno(vals, samples, dataset, mapping_scale, method, model, permCheck, num_perm, perm_strata_list, do_control, control_marker, manhattan_plot, pair_scan, cofactors): - logger.info("Start run_rqtl_geno"); - ## Get pointers to some common R functions - r_library = ro.r["library"] # Map the library function - r_c = ro.r["c"] # Map the c function - plot = ro.r["plot"] # Map the plot function - png = ro.r["png"] # Map the png function - dev_off = ro.r["dev.off"] # Map the device off function - - print((r_library("qtl"))) # Load R/qtl +logger = utility.logger.getLogger(__name__) - logger.info("QTL library loaded"); +GN3_RQTL_URL = "http://localhost:8086/api/rqtl/compute" +GN3_TMP_PATH = "/export/local/home/zas1024/genenetwork3/tmp" - ## Get pointers to some R/qtl functions - scanone = ro.r["scanone"] # Map the scanone function - scantwo = ro.r["scantwo"] # Map the scantwo function - calc_genoprob = ro.r["calc.genoprob"] # Map the calc.genoprob function +def run_rqtl(trait_name, vals, samples, dataset, mapping_scale, model, method, num_perm, perm_strata_list, do_control, control_marker, manhattan_plot, cofactors): + """Run R/qtl by making a request to the GN3 endpoint and reading in the output file(s)""" - crossname = dataset.group.name - #try: - # generate_cross_from_rdata(dataset) - # read_cross_from_rdata = ro.r["generate_cross_from_rdata"] # Map the local read_cross_from_rdata function - # genofilelocation = locate(crossname + ".RData", "genotype/rdata") - # cross_object = read_cross_from_rdata(genofilelocation) # Map the local GENOtoCSVR function - #except: - - if mapping_scale == "morgan": - scale_units = "cM" - else: - scale_units = "Mb" - - generate_cross_from_geno(dataset, scale_units) - GENOtoCSVR = ro.r["GENOtoCSVR"] # Map the local GENOtoCSVR function - crossfilelocation = TMPDIR + crossname + ".cross" + pheno_file = write_phenotype_file(trait_name, samples, vals, dataset, cofactors, perm_strata_list) if dataset.group.genofile: - genofilelocation = locate(dataset.group.genofile, "genotype") + geno_file = locate(dataset.group.genofile, "genotype") else: - genofilelocation = locate(dataset.group.name + ".geno", "genotype") - logger.info("Going to create a cross from geno"); - cross_object = GENOtoCSVR(genofilelocation, crossfilelocation) # TODO: Add the SEX if that is available - logger.info("before calc_genoprob"); - if manhattan_plot: - cross_object = calc_genoprob(cross_object) + geno_file = locate(dataset.group.name + ".geno", "genotype") + + post_data = { + "pheno_file": pheno_file, + "geno_file": geno_file, + "model": model, + "method": method, + "nperm": num_perm, + "scale": mapping_scale + } + + if do_control == "true" and control_marker: + post_data["control_marker"] = control_marker + + if not manhattan_plot: + post_data["interval"] = True + if cofactors: + post_data["addcovar"] = True + + if perm_strata_list: + post_data["pstrata"] = True + + rqtl_output = requests.post(GN3_RQTL_URL, data=post_data).json() + if num_perm > 0: + return rqtl_output['perm_results'], rqtl_output['suggestive'], rqtl_output['significant'], rqtl_output['results'] else: - cross_object = calc_genoprob(cross_object, step=5, stepwidth="max") - logger.info("after calc_genoprob"); - pheno_string = sanitize_rqtl_phenotype(vals) - logger.info("phenostring done"); - names_string = sanitize_rqtl_names(samples) - logger.info("sanitized pheno and names"); - cross_object = add_phenotype(cross_object, pheno_string, "the_pheno") # Add the phenotype - cross_object = add_names(cross_object, names_string, "the_names") # Add the phenotype - logger.info("Added pheno and names"); - marker_covars = create_marker_covariates(control_marker, cross_object) # Create the additive covariate markers - logger.info("Marker covars done"); - if cofactors != "": - logger.info("Cofactors: " + cofactors); - cross_object, trait_covars = add_cofactors(cross_object, dataset, cofactors, samples) # Create the covariates from selected traits - ro.r('all_covars <- cbind(marker_covars, trait_covars)') - else: - ro.r('all_covars <- marker_covars') - covars = ro.r['all_covars'] - #DEBUG to save the session object to file - if pair_scan: - if do_control == "true": - logger.info("Using covariate"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", addcovar = covars, model=model, method=method, n_cluster = 16) - else: - logger.info("No covariates"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", model=model, method=method, n_cluster = 16) + return rqtl_output['results'] - pair_scan_filename = webqtlUtil.genRandStr("scantwo_") + ".png" - png(file=TEMPDIR+pair_scan_filename) - plot(result_data_frame) - dev_off() - return process_pair_scan_results(result_data_frame) - else: - if do_control == "true" or cofactors != "": - logger.info("Using covariate"); result_data_frame = scanone(cross_object, pheno = "the_pheno", addcovar = covars, model=model, method=method) - ro.r('save.image(file = "/home/zas1024/gn2-zach/itp_cofactor_test.RData")') - else: - logger.info("No covariates"); result_data_frame = scanone(cross_object, pheno = "the_pheno", model=model, method=method) - - if num_perm > 0 and permCheck == "ON": # Do permutation (if requested by user) - if len(perm_strata_list) > 0: #ZS: The strata list would only be populated if "Stratified" was checked on before mapping - cross_object, strata_ob = add_perm_strata(cross_object, perm_strata_list) - if do_control == "true" or cofactors != "": - perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", addcovar = covars, n_perm = int(num_perm), perm_strata = strata_ob, model=model, method=method) - else: - perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", n_perm = num_perm, perm_strata = strata_ob, model=model, method=method) - else: - if do_control == "true" or cofactors != "": - perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", addcovar = covars, n_perm = int(num_perm), model=model, method=method) - else: - perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", n_perm = num_perm, model=model, method=method) - - perm_output, suggestive, significant = process_rqtl_perm_results(num_perm, perm_data_frame) # Functions that sets the thresholds for the webinterface - return perm_output, suggestive, significant, process_rqtl_results(result_data_frame, dataset.group.species) - else: - return process_rqtl_results(result_data_frame, dataset.group.species) - -def generate_cross_from_rdata(dataset): - rdata_location = locate(dataset.group.name + ".RData", "genotype/rdata") - ro.r(""" - generate_cross_from_rdata <- function(filename = '%s') { - load(file=filename) - cross = cunique - return(cross) - } - """ % (rdata_location)) - -def generate_cross_from_geno(dataset, scale_units): # TODO: Need to figure out why some genofiles have the wrong format and don't convert properly +def get_hash_of_textio(the_file: TextIO) -> str: + """Given a StringIO, return the hash of its contents""" - ro.r(""" - trim <- function( x ) { gsub("(^[[:space:]]+|[[:space:]]+$)", "", x) } - getGenoCode <- function(header, name = 'unk'){ - mat = which(unlist(lapply(header,function(x){ length(grep(paste('@',name,sep=''), x)) })) == 1) - return(trim(strsplit(header[mat],':')[[1]][2])) - } - GENOtoCSVR <- function(genotypes = '%s', out = 'cross.csvr', phenotype = NULL, sex = NULL, verbose = FALSE){ - header = readLines(genotypes, 40) # Assume a geno header is not longer than 40 lines - toskip = which(unlist(lapply(header, function(x){ length(grep("Chr\t", x)) })) == 1)-1 # Major hack to skip the geno headers - type <- getGenoCode(header, 'type') - if(type == '4-way'){ - genocodes <- NULL - } else { - genocodes <- c(getGenoCode(header, 'mat'), getGenoCode(header, 'het'), getGenoCode(header, 'pat')) # Get the genotype codes - } - genodata <- read.csv(genotypes, sep='\t', skip=toskip, header=TRUE, na.strings=getGenoCode(header,'unk'), colClasses='character', comment.char = '#') - cat('Genodata:', toskip, " ", dim(genodata), genocodes, '\n') - if(is.null(phenotype)) phenotype <- runif((ncol(genodata)-4)) # If there isn't a phenotype, generate a random one - if(is.null(sex)) sex <- rep('m', (ncol(genodata)-4)) # If there isn't a sex phenotype, treat all as males - outCSVR <- rbind(c('Pheno', '', '', phenotype), # Phenotype - c('sex', '', '', sex), # Sex phenotype for the mice - cbind(genodata[,c('Locus','Chr', '%s')], genodata[, 5:ncol(genodata)])) # Genotypes - write.table(outCSVR, file = out, row.names=FALSE, col.names=FALSE,quote=FALSE, sep=',') # Save it to a file - require(qtl) - if(type == '4-way'){ - cat('Loading in as 4-WAY\n') - cross = read.cross(file=out, 'csvr', genotypes=NULL, crosstype="4way") # Load the created cross file using R/qtl read.cross - }else if(type == 'f2'){ - cat('Loading in as F2\n') - cross = read.cross(file=out, 'csvr', genotypes=genocodes, crosstype="f2") # Load the created cross file using R/qtl read.cross - }else{ - cat('Loading in as normal\n') - cross = read.cross(file=out, 'csvr', genotypes=genocodes) # Load the created cross file using R/qtl read.cross - } - if(type == 'riset'){ - cat('Converting to RISELF\n') - cross <- convert2riself(cross) # If its a RIL, convert to a RIL in R/qtl - } - return(cross) - } - """ % (dataset.group.genofile, scale_units)) + the_file.seek(0) + hash_of_file = hashlib.md5(the_file.read().encode()).hexdigest() -def add_perm_strata(cross, perm_strata): - col_string = 'c("the_strata")' - perm_strata_string = "c(" - for item in perm_strata: - perm_strata_string += str(item) + "," + return hash_of_file - perm_strata_string = perm_strata_string[:-1] + ")" - cross = add_phenotype(cross, perm_strata_string, "the_strata") +def write_phenotype_file(trait_name: str, + samples: List[str], + vals: List, + dataset_ob, + cofactors: Optional[str] = None, + perm_strata_list: Optional[List] = None) -> TextIO: + """Given trait name, sample list, value list, dataset ob, and optional string + representing cofactors, return the file's full path/name - strata_ob = pull_var("perm_strata", cross, col_string) + """ + cofactor_data = cofactors_to_dict(cofactors, dataset_ob, samples) - return cross, strata_ob + pheno_file = io.StringIO() + writer = csv.writer(pheno_file, delimiter="\t", quoting=csv.QUOTE_NONE) -def sanitize_rqtl_phenotype(vals): - pheno_as_string = "c(" - for i, val in enumerate(vals): - if val == "x": - if i < (len(vals) - 1): - pheno_as_string += "NA," - else: - pheno_as_string += "NA" - else: - if i < (len(vals) - 1): - pheno_as_string += str(val) + "," - else: - pheno_as_string += str(val) - pheno_as_string += ")" - - return pheno_as_string + header_row = ["Samples", trait_name] + header_row += [cofactor for cofactor in cofactor_data] + if perm_strata_list: + header_row.append("Strata") -def sanitize_rqtl_names(vals): - pheno_as_string = "c(" - for i, val in enumerate(vals): - if val == "x": - if i < (len(vals) - 1): - pheno_as_string += "NA," - else: - pheno_as_string += "NA" + writer.writerow(header_row) + for i, sample in enumerate(samples): + this_row = [sample] + if vals[i] != "x": + this_row.append(vals[i]) else: - if i < (len(vals) - 1): - pheno_as_string += "'" + str(val) + "'," - else: - pheno_as_string += "'" + str(val) + "'" - pheno_as_string += ")" - - return pheno_as_string - -def add_phenotype(cross, pheno_as_string, col_name): - ro.globalenv["the_cross"] = cross - ro.r('pheno <- data.frame(pull.pheno(the_cross))') - ro.r('the_cross$pheno <- cbind(pheno, ' + col_name + ' = as.numeric('+ pheno_as_string +'))') - return ro.r["the_cross"] - -def add_categorical_covar(cross, covar_as_string, i): - ro.globalenv["the_cross"] = cross - logger.info("cross set"); - ro.r('covar <- as.factor(' + covar_as_string + ')') - logger.info("covar set"); - ro.r('newcovar <- model.matrix(~covar-1)') - logger.info("model.matrix finished"); - ro.r('cat("new covar columns", ncol(newcovar), "\n")') - nCol = ro.r('ncol(newcovar)') - logger.info("ncol covar done: " + str(nCol[0])); - ro.r('pheno <- data.frame(pull.pheno(the_cross))') - logger.info("pheno pulled from cross"); - nCol = int(nCol[0]) - logger.info("nCol python int:" + str(nCol)); - col_names = [] - #logger.info("loop") - for x in range(1, (nCol+1)): - #logger.info("loop" + str(x)); - col_name = "covar_" + str(i) + "_" + str(x) - #logger.info("col_name" + col_name); - ro.r('the_cross$pheno <- cbind(pheno, ' + col_name + ' = newcovar[,' + str(x) + '])') - col_names.append(col_name) - #logger.info("loop" + str(x) + "done"); - - logger.info("returning from add_categorical_covar"); - return ro.r["the_cross"], col_names - - -def add_names(cross, names_as_string, col_name): - ro.globalenv["the_cross"] = cross - ro.r('pheno <- data.frame(pull.pheno(the_cross))') - ro.r('the_cross$pheno <- cbind(pheno, ' + col_name + ' = '+ names_as_string +')') - return ro.r["the_cross"] - -def pull_var(var_name, cross, var_string): - ro.globalenv["the_cross"] = cross - ro.r(var_name +' <- pull.pheno(the_cross, ' + var_string + ')') - - return ro.r[var_name] - -def add_cofactors(cross, this_dataset, covariates, samples): - ro.numpy2ri.activate() - - covariate_list = covariates.split(",") - covar_name_string = "c(" - for i, covariate in enumerate(covariate_list): - logger.info("Covariate: " + covariate); - this_covar_data = [] - covar_as_string = "c(" - trait_name = covariate.split(":")[0] - dataset_ob = create_dataset(covariate.split(":")[1]) - trait_ob = create_trait(dataset=dataset_ob, - name=trait_name, - cellid=None) - - this_dataset.group.get_samplelist() - trait_samples = this_dataset.group.samplelist - trait_sample_data = trait_ob.data - for index, sample in enumerate(samples): - if sample in trait_samples: - if sample in trait_sample_data: - sample_value = trait_sample_data[sample].value - this_covar_data.append(sample_value) - else: - this_covar_data.append("NA") - - for j, item in enumerate(this_covar_data): - if j < (len(this_covar_data) - 1): - covar_as_string += str(item) + "," - else: - covar_as_string += str(item) - - covar_as_string += ")" - - datatype = get_trait_data_type(covariate) - logger.info("Covariate: " + covariate + " is of type: " + datatype); - if(datatype == "categorical"): # Cat variable - logger.info("call of add_categorical_covar"); - cross, col_names = add_categorical_covar(cross, covar_as_string, i) # Expand and add it to the cross - logger.info("add_categorical_covar returned"); - for z, col_name in enumerate(col_names): # Go through the additional covar names - if i < (len(covariate_list) - 1): - covar_name_string += '"' + col_name + '", ' - else: - if(z < (len(col_names) -1)): - covar_name_string += '"' + col_name + '", ' + this_row.append("NA") + for cofactor in cofactor_data: + this_row.append(cofactor_data[cofactor][i]) + if perm_strata_list: + this_row.append(perm_strata_list[i]) + writer.writerow(this_row) + + hash_of_file = get_hash_of_textio(pheno_file) + file_path = TMPDIR + hash_of_file + ".csv" + + with open(file_path, "w") as fd: + pheno_file.seek(0) + shutil.copyfileobj(pheno_file, fd) + + return file_path + + +def cofactors_to_dict(cofactors: str, dataset_ob, samples) -> Dict: + """Given a string of cofactors, the trait being mapped's dataset ob, + and list of samples, return cofactor data as a Dict + + """ + cofactor_dict = {} + if cofactors: + dataset_ob.group.get_samplelist() + sample_list = dataset_ob.group.samplelist + for cofactor in cofactors.split(","): + cofactor_name, cofactor_dataset = cofactor.split(":") + if cofactor_dataset == dataset_ob.name: + cofactor_dict[cofactor_name] = [] + trait_ob = create_trait(dataset=dataset_ob, + name=cofactor_name) + sample_data = trait_ob.data + for index, sample in enumerate(samples): + if sample in sample_data: + sample_value = sample_data[sample].value + cofactor_dict[cofactor_name].append(sample_value) else: - covar_name_string += '"' + col_name + '"' - else: - col_name = "covar_" + str(i) - cross = add_phenotype(cross, covar_as_string, col_name) - if i < (len(covariate_list) - 1): - covar_name_string += '"' + col_name + '", ' - else: - covar_name_string += '"' + col_name + '"' - - covar_name_string += ")" - covars_ob = pull_var("trait_covars", cross, covar_name_string) - return cross, covars_ob - -def create_marker_covariates(control_marker, cross): - ro.globalenv["the_cross"] = cross - ro.r('genotypes <- pull.geno(the_cross)') # Get the genotype matrix - userinput_sanitized = control_marker.replace(" ", "").split(",") # TODO: sanitize user input, Never Ever trust a user - logger.debug(userinput_sanitized) - if len(userinput_sanitized) > 0: - covariate_names = ', '.join('"{0}"'.format(w) for w in userinput_sanitized) - ro.r('covnames <- c(' + covariate_names + ')') - else: - ro.r('covnames <- c()') - ro.r('covInGeno <- which(covnames %in% colnames(genotypes))') - ro.r('covnames <- covnames[covInGeno]') - ro.r("cat('covnames (purged): ', covnames,'\n')") - ro.r('marker_covars <- genotypes[,covnames]') # Get the covariate matrix by using the marker name as index to the genotype file - # TODO: Create a design matrix from the marker covars for the markers in case of an F2, 4way, etc - return ro.r["marker_covars"] - -def process_pair_scan_results(result): - pair_scan_results = [] - - result = result[1] - output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)] - - for i, line in enumerate(result.iter_row()): - marker = {} - marker['name'] = result.rownames[i] - marker['chr1'] = output[i][0] - marker['Mb'] = output[i][1] - marker['chr2'] = int(output[i][2]) - pair_scan_results.append(marker) - - return pair_scan_results - -def process_rqtl_perm_results(num_perm, results): - perm_vals = [] - for line in str(results).split("\n")[1:(num_perm+1)]: - #print("R/qtl permutation line:", line.split()) - perm_vals.append(float(line.split()[1])) - - perm_output = perm_vals - suggestive = np.percentile(np.array(perm_vals), 67) - significant = np.percentile(np.array(perm_vals), 95) - - return perm_output, suggestive, significant - -def process_rqtl_results(result, species_name): # TODO: how to make this a one liner and not copy the stuff in a loop - qtl_results = [] - output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)] - - for i, line in enumerate(result.iter_row()): - marker = {} - marker['name'] = result.rownames[i] - if species_name == "mouse" and output[i][0] == 20: #ZS: This is awkward, but I'm not sure how to change the 20s to Xs in the RData file - marker['chr'] = "X" - else: - marker['chr'] = output[i][0] - marker['cM'] = output[i][1] - marker['Mb'] = output[i][1] - marker['lod_score'] = output[i][2] - qtl_results.append(marker) - - return qtl_results
\ No newline at end of file + cofactor_dict[cofactor_name].append("NA") + return cofactor_dict diff --git a/wqflask/wqflask/marker_regression/run_mapping.py b/wqflask/wqflask/marker_regression/run_mapping.py index 8f051c14..c5b980a7 100644 --- a/wqflask/wqflask/marker_regression/run_mapping.py +++ b/wqflask/wqflask/marker_regression/run_mapping.py @@ -1,5 +1,5 @@ from base.trait import GeneralTrait -from base import data_set #import create_dataset +from base import data_set # import create_dataset from pprint import pformat as pf @@ -13,7 +13,6 @@ import os import collections import uuid -import rpy2.robjects as ro import numpy as np import pickle as pickle @@ -43,16 +42,18 @@ from utility.external import shell from base.webqtlConfig import TMPDIR, GENERATED_TEXT_DIR import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) -class RunMapping(object): + +class RunMapping: def __init__(self, start_vars, temp_uuid): helper_functions.get_species_dataset_trait(self, start_vars) - self.temp_uuid = temp_uuid #needed to pass temp_uuid to gn1 mapping code (marker_regression_gn1.py) + # needed to pass temp_uuid to gn1 mapping code (marker_regression_gn1.py) + self.temp_uuid = temp_uuid - #ZS: Needed to zoom in or remap temp traits like PCA traits + # ZS: Needed to zoom in or remap temp traits like PCA traits if "temp_trait" in start_vars and start_vars['temp_trait'] != "False": self.temp_trait = "True" self.group = self.dataset.group.name @@ -60,13 +61,14 @@ class RunMapping(object): self.json_data = {} self.json_data['lodnames'] = ['lod.hk'] - #ZS: Sometimes a group may have a genofile that only includes a subset of samples + # ZS: Sometimes a group may have a genofile that only includes a subset of samples genofile_samplelist = [] if 'genofile' in start_vars: - if start_vars['genofile'] != "": - self.genofile_string = start_vars['genofile'] - self.dataset.group.genofile = self.genofile_string.split(":")[0] - genofile_samplelist = get_genofile_samplelist(self.dataset) + if start_vars['genofile'] != "": + self.genofile_string = start_vars['genofile'] + self.dataset.group.genofile = self.genofile_string.split(":")[ + 0] + genofile_samplelist = get_genofile_samplelist(self.dataset) all_samples_ordered = self.dataset.group.all_samples_ordered() @@ -93,7 +95,7 @@ class RunMapping(object): else: self.n_samples = len([val for val in self.vals if val != "x"]) - #ZS: Check if genotypes exist in the DB in order to create links for markers + # ZS: Check if genotypes exist in the DB in order to create links for markers self.geno_db_exists = geno_db_exists(self.dataset) @@ -101,8 +103,11 @@ class RunMapping(object): if "results_path" in start_vars: self.mapping_results_path = start_vars['results_path'] else: - mapping_results_filename = self.dataset.group.name + "_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) - self.mapping_results_path = "{}{}.csv".format(webqtlConfig.GENERATED_IMAGE_DIR, mapping_results_filename) + mapping_results_filename = self.dataset.group.name + "_" + \ + ''.join(random.choice(string.ascii_uppercase + string.digits) + for _ in range(6)) + self.mapping_results_path = "{}{}.csv".format( + webqtlConfig.GENERATED_IMAGE_DIR, mapping_results_filename) self.manhattan_plot = False if 'manhattan_plot' in start_vars: @@ -114,19 +119,20 @@ class RunMapping(object): self.manhattan_single_color = start_vars['manhattan_single_color'] self.manhattan_plot = True - self.maf = start_vars['maf'] # Minor allele frequency + self.maf = start_vars['maf'] # Minor allele frequency if "use_loco" in start_vars: self.use_loco = start_vars['use_loco'] else: self.use_loco = None self.suggestive = "" self.significant = "" - self.pair_scan = False # Initializing this since it is checked in views to determine which template to use + # Initializing this since it is checked in views to determine which template to use + self.pair_scan = False if 'transform' in start_vars: self.transform = start_vars['transform'] else: self.transform = "" - self.score_type = "LRS" #ZS: LRS or LOD + self.score_type = "LRS" # ZS: LRS or LOD self.mapping_scale = "physic" if "mapping_scale" in start_vars: self.mapping_scale = start_vars['mapping_scale'] @@ -136,10 +142,11 @@ class RunMapping(object): self.covariates = start_vars['covariates'] if "covariates" in start_vars else "" self.categorical_vars = [] - #ZS: This is passed to GN1 code for single chr mapping + # ZS: This is passed to GN1 code for single chr mapping self.selected_chr = -1 if "selected_chr" in start_vars: - if int(start_vars['selected_chr']) != -1: #ZS: Needs to be -1 if showing full map; there's probably a better way to fix this + # ZS: Needs to be -1 if showing full map; there's probably a better way to fix this + if int(start_vars['selected_chr']) != -1: self.selected_chr = int(start_vars['selected_chr']) + 1 else: self.selected_chr = int(start_vars['selected_chr']) @@ -153,7 +160,7 @@ class RunMapping(object): self.lrsMax = start_vars['lrsMax'] if "haplotypeAnalystCheck" in start_vars: self.haplotypeAnalystCheck = start_vars['haplotypeAnalystCheck'] - if "startMb" in start_vars: #ZS: This is to ensure showGenes, Legend, etc are checked the first time you open the mapping page, since startMb will only not be set during the first load + if "startMb" in start_vars: # ZS: This is to ensure showGenes, Legend, etc are checked the first time you open the mapping page, since startMb will only not be set during the first load if "permCheck" in start_vars: self.permCheck = "ON" else: @@ -191,34 +198,39 @@ class RunMapping(object): self.showGenes = "ON" self.viewLegend = "ON" - #self.dataset.group.get_markers() + # self.dataset.group.get_markers() if self.mapping_method == "gemma": self.first_run = True self.output_files = None if 'output_files' in start_vars: self.output_files = start_vars['output_files'] - if 'first_run' in start_vars: #ZS: check if first run so existing result files can be used if it isn't (for example zooming on a chromosome, etc) + # ZS: check if first run so existing result files can be used if it isn't (for example zooming on a chromosome, etc) + if 'first_run' in start_vars: self.first_run = False self.score_type = "-logP" self.manhattan_plot = True with Bench("Running GEMMA"): if self.use_loco == "True": - marker_obs, self.output_files = gemma_mapping.run_gemma(self.this_trait, self.dataset, self.samples, self.vals, self.covariates, self.use_loco, self.maf, self.first_run, self.output_files) + marker_obs, self.output_files = gemma_mapping.run_gemma( + self.this_trait, self.dataset, self.samples, self.vals, self.covariates, self.use_loco, self.maf, self.first_run, self.output_files) else: - marker_obs, self.output_files = gemma_mapping.run_gemma(self.this_trait, self.dataset, self.samples, self.vals, self.covariates, self.use_loco, self.maf, self.first_run, self.output_files) + marker_obs, self.output_files = gemma_mapping.run_gemma( + self.this_trait, self.dataset, self.samples, self.vals, self.covariates, self.use_loco, self.maf, self.first_run, self.output_files) results = marker_obs elif self.mapping_method == "rqtl_plink": results = self.run_rqtl_plink() elif self.mapping_method == "rqtl_geno": perm_strata = [] if "perm_strata" in start_vars and "categorical_vars" in start_vars: - self.categorical_vars = start_vars["categorical_vars"].split(",") + self.categorical_vars = start_vars["categorical_vars"].split( + ",") if len(self.categorical_vars) and start_vars["perm_strata"] == "True": - primary_samples = SampleList(dataset = self.dataset, - sample_names = self.samples, - this_trait = self.this_trait) + primary_samples = SampleList(dataset=self.dataset, + sample_names=self.samples, + this_trait=self.this_trait) - perm_strata = get_perm_strata(self.this_trait, primary_samples, self.categorical_vars, self.samples) + perm_strata = get_perm_strata( + self.this_trait, primary_samples, self.categorical_vars, self.samples) self.score_type = "LOD" self.control_marker = start_vars['control_marker'] self.do_control = start_vars['do_control'] @@ -227,14 +239,16 @@ class RunMapping(object): else: self.method = "em" self.model = start_vars['mapmodel_rqtl_geno'] - #if start_vars['pair_scan'] == "true": + # if start_vars['pair_scan'] == "true": # self.pair_scan = True if self.permCheck and self.num_perm > 0: - self.perm_output, self.suggestive, self.significant, results= rqtl_mapping.run_rqtl_geno(self.vals, self.samples, self.dataset, self.mapping_scale, self.method, self.model, self.permCheck, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates) + self.perm_output, self.suggestive, self.significant, results = rqtl_mapping.run_rqtl( + self.this_trait.name, self.vals, self.samples, self.dataset, self.mapping_scale, self.model, self.method, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.covariates) else: - results = rqtl_mapping.run_rqtl_geno(self.vals, self.samples, self.dataset, self.mapping_scale, self.method, self.model, self.permCheck, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates) + results = rqtl_mapping.run_rqtl(self.this_trait.name, self.vals, self.samples, self.dataset, self.mapping_scale, self.model, self.method, + self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.covariates) elif self.mapping_method == "reaper": - if "startMb" in start_vars: #ZS: Check if first time page loaded, so it can default to ON + if "startMb" in start_vars: # ZS: Check if first time page loaded, so it can default to ON if "additiveCheck" in start_vars: self.additiveCheck = start_vars['additiveCheck'] else: @@ -267,24 +281,26 @@ class RunMapping(object): if self.reaper_version == "new": self.first_run = True self.output_files = None - if 'first_run' in start_vars: #ZS: check if first run so existing result files can be used if it isn't (for example zooming on a chromosome, etc) + # ZS: check if first run so existing result files can be used if it isn't (for example zooming on a chromosome, etc) + if 'first_run' in start_vars: self.first_run = False if 'output_files' in start_vars: - self.output_files = start_vars['output_files'].split(",") + self.output_files = start_vars['output_files'].split( + ",") results, self.perm_output, self.suggestive, self.significant, self.bootstrap_results, self.output_files = qtlreaper_mapping.run_reaper(self.this_trait, - self.dataset, - self.samples, - self.vals, - self.json_data, - self.num_perm, - self.bootCheck, - self.num_bootstrap, - self.do_control, - self.control_marker, - self.manhattan_plot, - self.first_run, - self.output_files) + self.dataset, + self.samples, + self.vals, + self.json_data, + self.num_perm, + self.bootCheck, + self.num_bootstrap, + self.do_control, + self.control_marker, + self.manhattan_plot, + self.first_run, + self.output_files) else: results, self.json_data, self.perm_output, self.suggestive, self.significant, self.bootstrap_results = qtlreaper_mapping.run_original_reaper(self.this_trait, self.dataset, @@ -300,182 +316,201 @@ class RunMapping(object): elif self.mapping_method == "plink": self.score_type = "-logP" self.manhattan_plot = True - results = plink_mapping.run_plink(self.this_trait, self.dataset, self.species, self.vals, self.maf) + results = plink_mapping.run_plink( + self.this_trait, self.dataset, self.species, self.vals, self.maf) #results = self.run_plink() else: logger.debug("RUNNING NOTHING") self.no_results = False if len(results) == 0: - self.no_results = True + self.no_results = True else: - if self.pair_scan == True: - self.qtl_results = [] - highest_chr = 1 #This is needed in order to convert the highest chr to X/Y - for marker in results: - if marker['chr1'] > 0 or marker['chr1'] == "X" or marker['chr1'] == "X/Y": - if marker['chr1'] > highest_chr or marker['chr1'] == "X" or marker['chr1'] == "X/Y": - highest_chr = marker['chr1'] - if 'lod_score' in list(marker.keys()): - self.qtl_results.append(marker) - - self.trimmed_markers = results - - for qtl in enumerate(self.qtl_results): - self.json_data['chr1'].append(str(qtl['chr1'])) - self.json_data['chr2'].append(str(qtl['chr2'])) - self.json_data['Mb'].append(qtl['Mb']) - self.json_data['markernames'].append(qtl['name']) - - self.js_data = dict( - json_data = self.json_data, - this_trait = self.this_trait.name, - data_set = self.dataset.name, - maf = self.maf, - manhattan_plot = self.manhattan_plot, - mapping_scale = self.mapping_scale, - qtl_results = self.qtl_results - ) - - else: - self.qtl_results = [] - self.results_for_browser = [] - self.annotations_for_browser = [] - highest_chr = 1 #This is needed in order to convert the highest chr to X/Y - for marker in results: - if 'Mb' in marker: - this_ps = marker['Mb']*1000000 - else: - this_ps = marker['cM']*1000000 - - browser_marker = dict( - chr = str(marker['chr']), - rs = marker['name'], - ps = this_ps, - url = "/show_trait?trait_id=" + marker['name'] + "&dataset=" + self.dataset.group.name + "Geno" - ) - - if self.geno_db_exists == "True": - annot_marker = dict( - name = str(marker['name']), - chr = str(marker['chr']), - rs = marker['name'], - pos = this_ps, - url = "/show_trait?trait_id=" + marker['name'] + "&dataset=" + self.dataset.group.name + "Geno" - ) - else: - annot_marker = dict( - name = str(marker['name']), - chr = str(marker['chr']), - rs = marker['name'], - pos = this_ps - ) - - if 'lrs_value' in marker and marker['lrs_value'] > 0: - browser_marker['p_wald'] = 10**-(marker['lrs_value']/4.61) - elif 'lod_score' in marker and marker['lod_score'] > 0: - browser_marker['p_wald'] = 10**-(marker['lod_score']) - else: - browser_marker['p_wald'] = 0 - - self.results_for_browser.append(browser_marker) - self.annotations_for_browser.append(annot_marker) - if str(marker['chr']) > '0' or str(marker['chr']) == "X" or str(marker['chr']) == "X/Y": - if str(marker['chr']) > str(highest_chr) or str(marker['chr']) == "X" or str(marker['chr']) == "X/Y": - highest_chr = marker['chr'] - if ('lod_score' in marker.keys()) or ('lrs_value' in marker.keys()): - if 'Mb' in marker.keys(): - marker['display_pos'] = "Chr" + str(marker['chr']) + ": " + "{:.6f}".format(marker['Mb']) - elif 'cM' in marker.keys(): - marker['display_pos'] = "Chr" + str(marker['chr']) + ": " + "{:.3f}".format(marker['cM']) - else: - marker['display_pos'] = "N/A" - self.qtl_results.append(marker) - - total_markers = len(self.qtl_results) - - with Bench("Exporting Results"): - export_mapping_results(self.dataset, self.this_trait, self.qtl_results, self.mapping_results_path, self.mapping_scale, self.score_type, self.transform, self.covariates, self.n_samples) - - with Bench("Trimming Markers for Figure"): - if len(self.qtl_results) > 30000: - self.qtl_results = trim_markers_for_figure(self.qtl_results) - self.results_for_browser = trim_markers_for_figure(self.results_for_browser) - filtered_annotations = [] - for marker in self.results_for_browser: - for annot_marker in self.annotations_for_browser: - if annot_marker['rs'] == marker['rs']: - filtered_annotations.append(annot_marker) - break - self.annotations_for_browser = filtered_annotations - browser_files = write_input_for_browser(self.dataset, self.results_for_browser, self.annotations_for_browser) - else: - browser_files = write_input_for_browser(self.dataset, self.results_for_browser, self.annotations_for_browser) - - with Bench("Trimming Markers for Table"): - self.trimmed_markers = trim_markers_for_table(results) - - chr_lengths = get_chr_lengths(self.mapping_scale, self.mapping_method, self.dataset, self.qtl_results) - - #ZS: For zooming into genome browser, need to pass chromosome name instead of number - if self.dataset.group.species == "mouse": - if self.selected_chr == 20: - this_chr = "X" - else: - this_chr = str(self.selected_chr) - elif self.dataset.group.species == "rat": - if self.selected_chr == 21: - this_chr = "X" - else: - this_chr = str(self.selected_chr) - else: - if self.selected_chr == 22: - this_chr = "X" - elif self.selected_chr == 23: - this_chr = "Y" - else: - this_chr = str(self.selected_chr) - - if self.mapping_method != "gemma": - if self.score_type == "LRS": - significant_for_browser = self.significant / 4.61 - else: - significant_for_browser = self.significant - - self.js_data = dict( - #result_score_type = self.score_type, - #this_trait = self.this_trait.name, - #data_set = self.dataset.name, - #maf = self.maf, - #manhattan_plot = self.manhattan_plot, - #mapping_scale = self.mapping_scale, - #chromosomes = chromosome_mb_lengths, - #qtl_results = self.qtl_results, - categorical_vars = self.categorical_vars, - chr_lengths = chr_lengths, - num_perm = self.num_perm, - perm_results = self.perm_output, - significant = significant_for_browser, - browser_files = browser_files, - selected_chr = this_chr, - total_markers = total_markers - ) - else: + if self.pair_scan == True: + self.qtl_results = [] + highest_chr = 1 # This is needed in order to convert the highest chr to X/Y + for marker in results: + if marker['chr1'] > 0 or marker['chr1'] == "X" or marker['chr1'] == "X/Y": + if marker['chr1'] > highest_chr or marker['chr1'] == "X" or marker['chr1'] == "X/Y": + highest_chr = marker['chr1'] + if 'lod_score' in list(marker.keys()): + self.qtl_results.append(marker) + + self.trimmed_markers = results + + for qtl in enumerate(self.qtl_results): + self.json_data['chr1'].append(str(qtl['chr1'])) + self.json_data['chr2'].append(str(qtl['chr2'])) + self.json_data['Mb'].append(qtl['Mb']) + self.json_data['markernames'].append(qtl['name']) + self.js_data = dict( - chr_lengths = chr_lengths, - browser_files = browser_files, - selected_chr = this_chr, - total_markers = total_markers + json_data=self.json_data, + this_trait=self.this_trait.name, + data_set=self.dataset.name, + maf=self.maf, + manhattan_plot=self.manhattan_plot, + mapping_scale=self.mapping_scale, + qtl_results=self.qtl_results ) + else: + self.qtl_results = [] + self.results_for_browser = [] + self.annotations_for_browser = [] + highest_chr = 1 # This is needed in order to convert the highest chr to X/Y + for marker in results: + if 'Mb' in marker: + this_ps = marker['Mb'] * 1000000 + else: + this_ps = marker['cM'] * 1000000 + + browser_marker = dict( + chr=str(marker['chr']), + rs=marker['name'], + ps=this_ps, + url="/show_trait?trait_id=" + \ + marker['name'] + "&dataset=" + \ + self.dataset.group.name + "Geno" + ) + + if self.geno_db_exists == "True": + annot_marker = dict( + name=str(marker['name']), + chr=str(marker['chr']), + rs=marker['name'], + pos=this_ps, + url="/show_trait?trait_id=" + \ + marker['name'] + "&dataset=" + \ + self.dataset.group.name + "Geno" + ) + else: + annot_marker = dict( + name=str(marker['name']), + chr=str(marker['chr']), + rs=marker['name'], + pos=this_ps + ) + + if 'lrs_value' in marker and marker['lrs_value'] > 0: + browser_marker['p_wald'] = 10**- \ + (marker['lrs_value'] / 4.61) + elif 'lod_score' in marker and marker['lod_score'] > 0: + browser_marker['p_wald'] = 10**-(marker['lod_score']) + else: + browser_marker['p_wald'] = 0 + + self.results_for_browser.append(browser_marker) + self.annotations_for_browser.append(annot_marker) + if str(marker['chr']) > '0' or str(marker['chr']) == "X" or str(marker['chr']) == "X/Y": + if str(marker['chr']) > str(highest_chr) or str(marker['chr']) == "X" or str(marker['chr']) == "X/Y": + highest_chr = marker['chr'] + if ('lod_score' in marker.keys()) or ('lrs_value' in marker.keys()): + if 'Mb' in marker.keys(): + marker['display_pos'] = "Chr" + \ + str(marker['chr']) + ": " + \ + "{:.6f}".format(marker['Mb']) + elif 'cM' in marker.keys(): + marker['display_pos'] = "Chr" + \ + str(marker['chr']) + ": " + \ + "{:.3f}".format(marker['cM']) + else: + marker['display_pos'] = "N/A" + self.qtl_results.append(marker) + + total_markers = len(self.qtl_results) + + with Bench("Exporting Results"): + export_mapping_results(self.dataset, self.this_trait, self.qtl_results, self.mapping_results_path, + self.mapping_scale, self.score_type, self.transform, self.covariates, self.n_samples) + + with Bench("Trimming Markers for Figure"): + if len(self.qtl_results) > 30000: + self.qtl_results = trim_markers_for_figure( + self.qtl_results) + self.results_for_browser = trim_markers_for_figure( + self.results_for_browser) + filtered_annotations = [] + for marker in self.results_for_browser: + for annot_marker in self.annotations_for_browser: + if annot_marker['rs'] == marker['rs']: + filtered_annotations.append(annot_marker) + break + self.annotations_for_browser = filtered_annotations + browser_files = write_input_for_browser( + self.dataset, self.results_for_browser, self.annotations_for_browser) + else: + browser_files = write_input_for_browser( + self.dataset, self.results_for_browser, self.annotations_for_browser) + + with Bench("Trimming Markers for Table"): + self.trimmed_markers = trim_markers_for_table(results) + + chr_lengths = get_chr_lengths( + self.mapping_scale, self.mapping_method, self.dataset, self.qtl_results) + + # ZS: For zooming into genome browser, need to pass chromosome name instead of number + if self.dataset.group.species == "mouse": + if self.selected_chr == 20: + this_chr = "X" + else: + this_chr = str(self.selected_chr) + elif self.dataset.group.species == "rat": + if self.selected_chr == 21: + this_chr = "X" + else: + this_chr = str(self.selected_chr) + else: + if self.selected_chr == 22: + this_chr = "X" + elif self.selected_chr == 23: + this_chr = "Y" + else: + this_chr = str(self.selected_chr) + + if self.mapping_method != "gemma": + if self.score_type == "LRS": + significant_for_browser = self.significant / 4.61 + else: + significant_for_browser = self.significant + + self.js_data = dict( + #result_score_type = self.score_type, + #this_trait = self.this_trait.name, + #data_set = self.dataset.name, + #maf = self.maf, + #manhattan_plot = self.manhattan_plot, + #mapping_scale = self.mapping_scale, + #chromosomes = chromosome_mb_lengths, + #qtl_results = self.qtl_results, + categorical_vars=self.categorical_vars, + chr_lengths=chr_lengths, + num_perm=self.num_perm, + perm_results=self.perm_output, + significant=significant_for_browser, + browser_files=browser_files, + selected_chr=this_chr, + total_markers=total_markers + ) + else: + self.js_data = dict( + chr_lengths=chr_lengths, + browser_files=browser_files, + selected_chr=this_chr, + total_markers=total_markers + ) + def run_rqtl_plink(self): # os.chdir("") never do this inside a webserver!! - output_filename = webqtlUtil.genRandStr("%s_%s_"%(self.dataset.group.name, self.this_trait.name)) + output_filename = webqtlUtil.genRandStr("%s_%s_" % ( + self.dataset.group.name, self.this_trait.name)) - plink_mapping.gen_pheno_txt_file_plink(self.this_trait, self.dataset, self.vals, pheno_filename = output_filename) + plink_mapping.gen_pheno_txt_file_plink( + self.this_trait, self.dataset, self.vals, pheno_filename=output_filename) - rqtl_command = './plink --noweb --ped %s.ped --no-fid --no-parents --no-sex --no-pheno --map %s.map --pheno %s/%s.txt --pheno-name %s --maf %s --missing-phenotype -9999 --out %s%s --assoc ' % (self.dataset.group.name, self.dataset.group.name, TMPDIR, plink_output_filename, self.this_trait.name, self.maf, TMPDIR, plink_output_filename) + rqtl_command = './plink --noweb --ped %s.ped --no-fid --no-parents --no-sex --no-pheno --map %s.map --pheno %s/%s.txt --pheno-name %s --maf %s --missing-phenotype -9999 --out %s%s --assoc ' % ( + self.dataset.group.name, self.dataset.group.name, TMPDIR, plink_output_filename, self.this_trait.name, self.maf, TMPDIR, plink_output_filename) os.system(rqtl_command) @@ -504,10 +539,13 @@ class RunMapping(object): trimmed_genotype_data.append(new_genotypes) return trimmed_genotype_data + def export_mapping_results(dataset, trait, markers, results_path, mapping_scale, score_type, transform, covariates, n_samples): with open(results_path, "w+") as output_file: - output_file.write("Time/Date: " + datetime.datetime.now().strftime("%x / %X") + "\n") - output_file.write("Population: " + dataset.group.species.title() + " " + dataset.group.name + "\n") + output_file.write( + "Time/Date: " + datetime.datetime.now().strftime("%x / %X") + "\n") + output_file.write( + "Population: " + dataset.group.species.title() + " " + dataset.group.name + "\n") output_file.write("Data Set: " + dataset.fullname + "\n") output_file.write("N Samples: " + str(n_samples) + "\n") if len(transform) > 0: @@ -527,7 +565,8 @@ def export_mapping_results(dataset, trait, markers, results_path, mapping_scale, output_file.write(transform_text + "\n") if dataset.type == "ProbeSet": output_file.write("Gene Symbol: " + trait.symbol + "\n") - output_file.write("Location: " + str(trait.chr) + " @ " + str(trait.mb) + " Mb\n") + output_file.write("Location: " + str(trait.chr) + \ + " @ " + str(trait.mb) + " Mb\n") if len(covariates) > 0: output_file.write("Cofactors (dataset - trait):\n") for covariate in covariates.split(","): @@ -564,6 +603,7 @@ def export_mapping_results(dataset, trait, markers, results_path, mapping_scale, if i < (len(markers) - 1): output_file.write("\n") + def trim_markers_for_figure(markers): if 'p_wald' in list(markers[0].keys()): score_type = 'p_wald' @@ -612,11 +652,11 @@ def trim_markers_for_figure(markers): if low_counter % 20 == 0: filtered_markers.append(marker) low_counter += 1 - elif 4.61 <= marker[score_type] < (2*4.61): + elif 4.61 <= marker[score_type] < (2 * 4.61): if med_counter % 10 == 0: filtered_markers.append(marker) med_counter += 1 - elif (2*4.61) <= marker[score_type] <= (3*4.61): + elif (2 * 4.61) <= marker[score_type] <= (3 * 4.61): if high_counter % 2 == 0: filtered_markers.append(marker) high_counter += 1 @@ -624,21 +664,27 @@ def trim_markers_for_figure(markers): filtered_markers.append(marker) return filtered_markers + def trim_markers_for_table(markers): if 'lod_score' in list(markers[0].keys()): - sorted_markers = sorted(markers, key=lambda k: k['lod_score'], reverse=True) + sorted_markers = sorted( + markers, key=lambda k: k['lod_score'], reverse=True) else: - sorted_markers = sorted(markers, key=lambda k: k['lrs_value'], reverse=True) + sorted_markers = sorted( + markers, key=lambda k: k['lrs_value'], reverse=True) - #ZS: So we end up with a list of just 2000 markers + # ZS: So we end up with a list of just 2000 markers if len(sorted_markers) >= 2000: trimmed_sorted_markers = sorted_markers[:2000] return trimmed_sorted_markers else: return sorted_markers + def write_input_for_browser(this_dataset, gwas_results, annotations): - file_base = this_dataset.group.name + "_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) + file_base = this_dataset.group.name + "_" + \ + ''.join(random.choice(string.ascii_uppercase + string.digits) + for _ in range(6)) gwas_filename = file_base + "_GWAS" annot_filename = file_base + "_ANNOT" gwas_path = "{}/gn2/".format(TEMPDIR) + gwas_filename @@ -650,14 +696,17 @@ def write_input_for_browser(this_dataset, gwas_results, annotations): return [gwas_filename, annot_filename] + def geno_db_exists(this_dataset): geno_db_name = this_dataset.group.name + "Geno" try: - geno_db = data_set.create_dataset(dataset_name=geno_db_name, get_samplelist=False) + geno_db = data_set.create_dataset( + dataset_name=geno_db_name, get_samplelist=False) return "True" except: return "False" + def get_chr_lengths(mapping_scale, mapping_method, dataset, qtl_results): chr_lengths = [] if mapping_scale == "physic": @@ -682,9 +731,11 @@ def get_chr_lengths(mapping_scale, mapping_method, dataset, qtl_results): highest_pos = float(result['cM']) * 1000000 else: highest_pos = float(result['Mb']) * 1000000 - chr_lengths.append({ "chr": str(this_chr), "size": str(highest_pos)}) + chr_lengths.append( + {"chr": str(this_chr), "size": str(highest_pos)}) else: - chr_lengths.append({ "chr": str(this_chr), "size": str(highest_pos)}) + chr_lengths.append( + {"chr": str(this_chr), "size": str(highest_pos)}) this_chr = chr_as_num else: if mapping_method == "reaper": @@ -696,6 +747,7 @@ def get_chr_lengths(mapping_scale, mapping_method, dataset, qtl_results): return chr_lengths + def get_genofile_samplelist(dataset): genofile_samplelist = [] @@ -706,14 +758,16 @@ def get_genofile_samplelist(dataset): return genofile_samplelist + def get_perm_strata(this_trait, sample_list, categorical_vars, used_samples): perm_strata_strings = [] for sample in used_samples: if sample in list(sample_list.sample_attribute_values.keys()): combined_string = "" for var in categorical_vars: - if var in list(sample_list.sample_attribute_values[sample].keys()): - combined_string += str(sample_list.sample_attribute_values[sample][var]) + if var.lower() in sample_list.sample_attribute_values[sample]: + combined_string += str( + sample_list.sample_attribute_values[sample][var.lower()]) else: combined_string += "NA" else: @@ -721,7 +775,8 @@ def get_perm_strata(this_trait, sample_list, categorical_vars, used_samples): perm_strata_strings.append(combined_string) - d = dict([(y, x+1) for x, y in enumerate(sorted(set(perm_strata_strings)))]) + d = dict([(y, x + 1) + for x, y in enumerate(sorted(set(perm_strata_strings)))]) list_to_numbers = [d[x] for x in perm_strata_strings] perm_strata = list_to_numbers diff --git a/wqflask/wqflask/model.py b/wqflask/wqflask/model.py deleted file mode 100644 index 772f74e4..00000000 --- a/wqflask/wqflask/model.py +++ /dev/null @@ -1,169 +0,0 @@ -import uuid -import datetime - -import simplejson as json - -from flask import request - -from wqflask import app - -import sqlalchemy -from sqlalchemy import (Column, ForeignKey, Unicode, Boolean, DateTime, - Text, Index) -from sqlalchemy.orm import relationship - -from wqflask.database import Base, init_db - -class User(Base): - __tablename__ = "user" - id = Column(Unicode(36), primary_key=True, default=lambda: str(uuid.uuid4())) - email_address = Column(Unicode(50), unique=True, nullable=False) - - # Todo: Turn on strict mode for Mysql - password = Column(Text, nullable=False) - - full_name = Column(Unicode(50)) - organization = Column(Unicode(50)) - - active = Column(Boolean(), nullable=False, default=True) - - registration_info = Column(Text) # json detailing when they were registered, etc. - - confirmed = Column(Text) # json detailing when they confirmed, etc. - - superuser = Column(Text) # json detailing when they became a superuser, otherwise empty - # if not superuser - - logins = relationship("Login", - order_by="desc(Login.timestamp)", - lazy='dynamic', # Necessary for filter in login_count - foreign_keys="Login.user", - ) - - user_collections = relationship("UserCollection", - order_by="asc(UserCollection.name)", - lazy='dynamic', - ) - - def display_num_collections(self): - """ - Returns the number of collections or a blank string if there are zero. - - - Because this is so unimportant...we wrap the whole thing in a try/expect...last thing we - want is a webpage not to be displayed because of an error here - - Importand TODO: use redis to cache this, don't want to be constantly computing it - - """ - try: - num = len(list(self.user_collections)) - return display_collapsible(num) - except Exception as why: - print("Couldn't display_num_collections:", why) - return "" - - - def get_collection_by_name(self, collection_name): - try: - collect = self.user_collections.filter_by(name=collection_name).first() - except sqlalchemy.orm.exc.NoResultFound: - collect = None - return collect - - @property - def name_and_org(self): - """Nice shortcut for printing out who the user is""" - if self.organization: - return "{} from {}".format(self.full_name, self.organization) - else: - return self.full_name - - @property - def login_count(self): - return self.logins.filter_by(successful=True).count() - - - @property - def confirmed_at(self): - if self.confirmed: - confirmed_info = json.loads(self.confirmed) - return confirmed_info['timestamp'] - else: - return None - - @property - def superuser_info(self): - if self.superuser: - return json.loads(self.superuser) - else: - return None - - @property - def crowner(self): - """If made superuser, returns object of person who did the crowning""" - if self.superuser: - superuser_info = json.loads(self.superuser) - crowner = User.query.get(superuser_info['crowned_by']) - return crowner - else: - return None - - @property - def most_recent_login(self): - try: - return self.logins[0] - except IndexError: - return None - -class Login(Base): - __tablename__ = "login" - id = Column(Unicode(36), primary_key=True, default=lambda: str(uuid.uuid4())) - user = Column(Unicode(36), ForeignKey('user.id')) - timestamp = Column(DateTime(), default=lambda: datetime.datetime.utcnow()) - ip_address = Column(Unicode(39)) - successful = Column(Boolean(), nullable=False) # False if wrong password was entered - session_id = Column(Text) # Set only if successfully logged in, otherwise should be blank - - # Set to user who assumes identity if this was a login for debugging purposes by a superuser - assumed_by = Column(Unicode(36), ForeignKey('user.id')) - - def __init__(self, user): - self.user = user.id - self.ip_address = request.remote_addr - -################################################################################################## - -class UserCollection(Base): - __tablename__ = "user_collection" - id = Column(Unicode(36), primary_key=True, default=lambda: str(uuid.uuid4())) - user = Column(Unicode(36), ForeignKey('user.id')) - - # I'd prefer this to not have a length, but for the index below it needs one - name = Column(Unicode(50)) - created_timestamp = Column(DateTime(), default=lambda: datetime.datetime.utcnow()) - changed_timestamp = Column(DateTime(), default=lambda: datetime.datetime.utcnow()) - members = Column(Text) # We're going to store them as a json list - - # This index ensures a user doesn't have more than one collection with the same name - __table_args__ = (Index('usercollection_index', "user", "name"), ) - - @property - def num_members(self): - try: - return len(json.loads(self.members)) - except: - return 0 - - def members_as_set(self): - return set(json.loads(self.members)) - -def display_collapsible(number): - if number: - return number - else: - return "" - -def user_uuid(): - """Unique cookie for a user""" - user_uuid = request.cookies.get('user_uuid') diff --git a/wqflask/wqflask/network_graph/network_graph.py b/wqflask/wqflask/network_graph/network_graph.py index 1d5316a2..9b70f03d 100644 --- a/wqflask/wqflask/network_graph/network_graph.py +++ b/wqflask/wqflask/network_graph/network_graph.py @@ -28,7 +28,7 @@ from utility import corr_result_helpers from utility.tools import GN2_BRANCH_URL -class NetworkGraph(object): +class NetworkGraph: def __init__(self, start_vars): trait_db_list = [trait.strip() @@ -69,7 +69,7 @@ class NetworkGraph(object): this_trait = trait_db[0] this_db = trait_db[1] - this_db_samples = this_db.group.samplelist + this_db_samples = this_db.group.all_samples_ordered() this_sample_data = this_trait.data corr_result_row = [] @@ -80,7 +80,11 @@ class NetworkGraph(object): for target in self.trait_list: target_trait = target[0] target_db = target[1] - target_samples = target_db.group.samplelist + + if str(this_trait) == str(target_trait) and str(this_db) == str(target_db): + continue + + target_samples = target_db.group.all_samples_ordered() target_sample_data = target_trait.data @@ -137,7 +141,7 @@ class NetworkGraph(object): if abs(sample_r) > max_corr: max_corr = abs(sample_r) - edge_data = {'id': str(this_trait.name) + '_to_' + str(target_trait.name), + edge_data = {'id': f"{str(this_trait.name)}:{str(this_trait.dataset.name)}" + '_to_' + f"{str(target_trait.name)}:{str(target_trait.dataset.name)}", 'source': str(this_trait.name) + ":" + str(this_trait.dataset.name), 'target': str(target_trait.name) + ":" + str(target_trait.dataset.name), 'correlation': round(sample_r, 3), @@ -182,15 +186,3 @@ class NetworkGraph(object): samples=self.all_sample_list, sample_data=self.sample_data, elements=self.elements,) - - def get_trait_db_obs(self, trait_db_list): - self.trait_list = [] - for i, trait_db in enumerate(trait_db_list): - if i == (len(trait_db_list) - 1): - break - trait_name, dataset_name = trait_db.split(":") - dataset_ob = data_set.create_dataset(dataset_name) - trait_ob = create_trait(dataset=dataset_ob, - name=trait_name, - cellid=None) - self.trait_list.append((trait_ob, dataset_ob)) diff --git a/wqflask/wqflask/news.py b/wqflask/wqflask/news.py index 0675ec4b..e262dd51 100644 --- a/wqflask/wqflask/news.py +++ b/wqflask/wqflask/news.py @@ -1,6 +1,7 @@ from flask import g -class News(object): + +class News: def __init__(self): sql = """ diff --git a/wqflask/wqflask/parser.py b/wqflask/wqflask/parser.py index dcd328c9..bd1c4407 100644 --- a/wqflask/wqflask/parser.py +++ b/wqflask/wqflask/parser.py @@ -22,7 +22,8 @@ import re from pprint import pformat as pf from utility.logger import getLogger -logger = getLogger(__name__ ) +logger = getLogger(__name__) + def parse(pstring): """ @@ -33,7 +34,7 @@ def parse(pstring): (\w+\s*[=:\>\<][\w\*]+) | # wiki=bar, GO:foobar, etc (".*?") | ('.*?') | # terms in quotes, i.e. "brain weight" ([\w\*\?]+)) # shh, brain, etc """, pstring, - flags=re.VERBOSE) + flags=re.VERBOSE) pstring = [item.strip() for item in pstring if item and item.strip()] @@ -52,7 +53,7 @@ def parse(pstring): if '(' in value or '[' in value: assert value.startswith(("(", "[")), "Invalid token" assert value.endswith((")", "]")), "Invalid token" - value = value[1:-1] # Get rid of the parenthesis + value = value[1:-1] # Get rid of the parenthesis values = re.split(r"""\s+|,""", value) value = [value.strip() for value in values if value.strip()] else: diff --git a/wqflask/wqflask/pbkdf2.py b/wqflask/wqflask/pbkdf2.py index 6346df03..1a965fc5 100644 --- a/wqflask/wqflask/pbkdf2.py +++ b/wqflask/wqflask/pbkdf2.py @@ -4,6 +4,8 @@ from werkzeug.security import safe_str_cmp as ssc # Replace this because it just wraps around Python3's internal # functions. Added this during migration. + + def pbkdf2_hex(data, salt, iterations=1000, keylen=24, hashfunc="sha1"): """Wrapper function of python's hashlib.pbkdf2_hmac. """ diff --git a/wqflask/wqflask/resource_manager.py b/wqflask/wqflask/resource_manager.py index 7d51a83d..b28c1b04 100644 --- a/wqflask/wqflask/resource_manager.py +++ b/wqflask/wqflask/resource_manager.py @@ -11,6 +11,7 @@ from utility.redis_tools import get_resource_info, get_group_info, get_groups_li from utility.logger import getLogger logger = getLogger(__name__) + @app.route("/resources/manage", methods=('GET', 'POST')) def manage_resource(): params = request.form if request.form else request.args @@ -26,7 +27,7 @@ def manage_resource(): owner_display_name = None if owner_id != "none": - try: #ZS: User IDs are sometimes stored in Redis as bytes and sometimes as strings, so this is just to avoid any errors for the time being + try: # ZS: User IDs are sometimes stored in Redis as bytes and sometimes as strings, so this is just to avoid any errors for the time being owner_id = str.encode(owner_id) except: pass @@ -38,17 +39,20 @@ def manage_resource(): elif 'email_address' in owner_info: owner_display_name = owner_info['email_address'] - return render_template("admin/manage_resource.html", owner_name = owner_display_name, resource_id = resource_id, resource_info=resource_info, default_mask=default_mask, group_masks=group_masks_with_names, admin_status=admin_status) + return render_template("admin/manage_resource.html", owner_name=owner_display_name, resource_id=resource_id, resource_info=resource_info, default_mask=default_mask, group_masks=group_masks_with_names, admin_status=admin_status) + @app.route("/search_for_users", methods=('POST',)) def search_for_user(): params = request.form user_list = [] user_list += get_users_like_unique_column("full_name", params['user_name']) - user_list += get_users_like_unique_column("email_address", params['user_email']) + user_list += get_users_like_unique_column( + "email_address", params['user_email']) return json.dumps(user_list) + @app.route("/search_for_groups", methods=('POST',)) def search_for_groups(): params = request.form @@ -58,13 +62,15 @@ def search_for_groups(): user_list = [] user_list += get_users_like_unique_column("full_name", params['user_name']) - user_list += get_users_like_unique_column("email_address", params['user_email']) + user_list += get_users_like_unique_column( + "email_address", params['user_email']) for user in user_list: group_list += get_groups_like_unique_column("admins", user['user_id']) group_list += get_groups_like_unique_column("members", user['user_id']) return json.dumps(group_list) + @app.route("/resources/change_owner", methods=('POST',)) def change_owner(): resource_id = request.form['resource_id'] @@ -79,7 +85,8 @@ def change_owner(): flash("You lack the permissions to make this change.", "error") return redirect(url_for("manage_resource", resource_id=resource_id)) else: - return render_template("admin/change_resource_owner.html", resource_id = resource_id) + return render_template("admin/change_resource_owner.html", resource_id=resource_id) + @app.route("/resources/change_default_privileges", methods=('POST',)) def change_default_privileges(): @@ -99,6 +106,7 @@ def change_default_privileges(): else: return redirect(url_for("no_access_page")) + @app.route("/resources/add_group", methods=('POST',)) def add_group_to_resource(): resource_id = request.form['resource_id'] @@ -108,7 +116,7 @@ def add_group_to_resource(): group_id = request.form['selected_group'] resource_info = get_resource_info(resource_id) default_privileges = resource_info['default_mask'] - return render_template("admin/set_group_privileges.html", resource_id = resource_id, group_id = group_id, default_privileges = default_privileges) + return render_template("admin/set_group_privileges.html", resource_id=resource_id, group_id=group_id, default_privileges=default_privileges) elif all(key in request.form for key in ('data_privilege', 'metadata_privilege', 'admin_privilege')): group_id = request.form['group_id'] group_name = get_group_info(group_id)['name'] @@ -118,13 +126,15 @@ def add_group_to_resource(): 'admin': request.form['admin_privilege'] } add_access_mask(resource_id, group_id, access_mask) - flash("Privileges have been added for group {}.".format(group_name), "alert-info") + flash("Privileges have been added for group {}.".format( + group_name), "alert-info") return redirect(url_for("manage_resource", resource_id=resource_id)) else: - return render_template("admin/search_for_groups.html", resource_id = resource_id) + return render_template("admin/search_for_groups.html", resource_id=resource_id) else: return redirect(url_for("no_access_page")) + def get_group_names(group_masks): group_masks_with_names = {} for group_id, group_mask in list(group_masks.items()): @@ -132,5 +142,5 @@ def get_group_names(group_masks): group_name = get_group_info(group_id)['name'] this_mask['name'] = group_name group_masks_with_names[group_id] = this_mask - + return group_masks_with_names diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py index f23c0582..3cbda3dd 100644 --- a/wqflask/wqflask/search_results.py +++ b/wqflask/wqflask/search_results.py @@ -22,9 +22,10 @@ from utility.tools import GN2_BASE_URL from utility.type_checking import is_str from utility.logger import getLogger -logger = getLogger(__name__ ) +logger = getLogger(__name__) -class SearchResultPage(object): + +class SearchResultPage: #maxReturn = 3000 def __init__(self, kw): @@ -39,7 +40,7 @@ class SearchResultPage(object): self.uc_id = uuid.uuid4() self.go_term = None - logger.debug("uc_id:", self.uc_id) # contains a unique id + logger.debug("uc_id:", self.uc_id) # contains a unique id logger.debug("kw is:", kw) # dict containing search terms if kw['search_terms_or']: @@ -51,7 +52,8 @@ class SearchResultPage(object): search = self.search_terms self.original_search_string = self.search_terms # check for dodgy search terms - rx = re.compile(r'.*\W(href|http|sql|select|update)\W.*', re.IGNORECASE) + rx = re.compile( + r'.*\W(href|http|sql|select|update)\W.*', re.IGNORECASE) if rx.match(search): logger.info("Regex failed search") self.search_term_exists = False @@ -72,11 +74,11 @@ class SearchResultPage(object): self.dataset = create_dataset(kw['dataset'], dataset_type) logger.debug("search_terms:", self.search_terms) - #ZS: I don't like using try/except, but it seems like the easiest way to account for all possible bad searches here + # ZS: I don't like using try/except, but it seems like the easiest way to account for all possible bad searches here try: self.search() except: - self.search_term_exists = False + self.search_term_exists = False self.too_many_results = False if self.search_term_exists: @@ -95,7 +97,8 @@ class SearchResultPage(object): trait_list = [] json_trait_list = [] - species = webqtlDatabaseFunction.retrieve_species(self.dataset.group.name) + species = webqtlDatabaseFunction.retrieve_species( + self.dataset.group.name) # result_set represents the results for each search term; a search of # "shh grin2b" would have two sets of results, one for each term logger.debug("self.results is:", pf(self.results)) @@ -108,7 +111,8 @@ class SearchResultPage(object): trait_dict = {} trait_id = result[0] - this_trait = create_trait(dataset=self.dataset, name=trait_id, get_qtl_info=True, get_sample_info=False) + this_trait = create_trait( + dataset=self.dataset, name=trait_id, get_qtl_info=True, get_sample_info=False) if this_trait: trait_dict['index'] = index + 1 trait_dict['name'] = this_trait.name @@ -117,9 +121,10 @@ class SearchResultPage(object): else: trait_dict['display_name'] = this_trait.name trait_dict['dataset'] = this_trait.dataset.name - trait_dict['hmac'] = hmac.data_hmac('{}:{}'.format(this_trait.name, this_trait.dataset.name)) + trait_dict['hmac'] = hmac.data_hmac( + '{}:{}'.format(this_trait.name, this_trait.dataset.name)) if this_trait.dataset.type == "ProbeSet": - trait_dict['symbol'] = this_trait.symbol + trait_dict['symbol'] = this_trait.symbol if this_trait.symbol else "N/A" trait_dict['description'] = "N/A" if this_trait.description_display: trait_dict['description'] = this_trait.description_display @@ -167,9 +172,11 @@ class SearchResultPage(object): self.trait_list = trait_list if self.dataset.type == "ProbeSet": - self.header_data_names = ['index', 'display_name', 'symbol', 'description', 'location', 'mean', 'lrs_score', 'lrs_location', 'additive'] + self.header_data_names = ['index', 'display_name', 'symbol', 'description', + 'location', 'mean', 'lrs_score', 'lrs_location', 'additive'] elif self.dataset.type == "Publish": - self.header_data_names = ['index', 'display_name', 'description', 'mean', 'authors', 'pubmed_text', 'lrs_score', 'lrs_location', 'additive'] + self.header_data_names = ['index', 'display_name', 'description', 'mean', + 'authors', 'pubmed_text', 'lrs_score', 'lrs_location', 'additive'] elif self.dataset.type == "Geno": self.header_data_names = ['index', 'display_name', 'location'] @@ -183,7 +190,8 @@ class SearchResultPage(object): combined_from_clause = "" combined_where_clause = "" - previous_from_clauses = [] #The same table can't be referenced twice in the from clause + # The same table can't be referenced twice in the from clause + previous_from_clauses = [] logger.debug("len(search_terms)>1") symbol_list = [] @@ -197,7 +205,8 @@ class SearchResultPage(object): for i, a_search in enumerate(alias_terms): the_search = self.get_search_ob(a_search) if the_search != None: - get_from_clause = getattr(the_search, "get_from_clause", None) + get_from_clause = getattr( + the_search, "get_from_clause", None) if callable(get_from_clause): from_clause = the_search.get_from_clause() if from_clause in previous_from_clauses: @@ -221,7 +230,8 @@ class SearchResultPage(object): else: the_search = self.get_search_ob(a_search) if the_search != None: - get_from_clause = getattr(the_search, "get_from_clause", None) + get_from_clause = getattr( + the_search, "get_from_clause", None) if callable(get_from_clause): from_clause = the_search.get_from_clause() if from_clause in previous_from_clauses: @@ -231,7 +241,7 @@ class SearchResultPage(object): combined_from_clause += from_clause where_clause = the_search.get_where_clause() combined_where_clause += "(" + where_clause + ")" - if (i+1) < len(self.search_terms): + if (i + 1) < len(self.search_terms): if self.and_or == "and": combined_where_clause += "AND" else: @@ -240,7 +250,8 @@ class SearchResultPage(object): self.search_term_exists = False if self.search_term_exists: combined_where_clause = "(" + combined_where_clause + ")" - final_query = the_search.compile_final_query(combined_from_clause, combined_where_clause) + final_query = the_search.compile_final_query( + combined_from_clause, combined_where_clause) results = the_search.execute(final_query) self.results.extend(results) @@ -262,14 +273,15 @@ class SearchResultPage(object): if search_ob: search_class = getattr(do_search, search_ob) the_search = search_class(search_term, - search_operator, - self.dataset, - search_type['key'] - ) + search_operator, + self.dataset, + search_type['key'] + ) return the_search else: return None + def get_GO_symbols(a_search): query = """SELECT genes FROM GORef @@ -287,13 +299,15 @@ def get_GO_symbols(a_search): return new_terms + def insert_newlines(string, every=64): """ This is because it is seemingly impossible to change the width of the description column, so I'm just manually adding line breaks """ lines = [] for i in range(0, len(string), every): - lines.append(string[i:i+every]) + lines.append(string[i:i + every]) return '\n'.join(lines) + def get_aliases(symbol_list, species): updated_symbols = [] @@ -308,7 +322,8 @@ def get_aliases(symbol_list, species): symbols_string = ",".join(updated_symbols) filtered_aliases = [] - response = requests.get(GN2_BASE_URL + "/gn3/gene/aliases2/" + symbols_string) + response = requests.get( + GN2_BASE_URL + "/gn3/gene/aliases2/" + symbols_string) if response: alias_lists = json.loads(response.content) seen = set() @@ -322,10 +337,9 @@ def get_aliases(symbol_list, species): search_terms = [] for alias in filtered_aliases: - the_search_term = {'key': None, + the_search_term = {'key': None, 'search_term': [alias], - 'separator' : None} + 'separator': None} search_terms.append(the_search_term) return search_terms - diff --git a/wqflask/wqflask/send_mail.py b/wqflask/wqflask/send_mail.py index 86e8a558..299c866a 100644 --- a/wqflask/wqflask/send_mail.py +++ b/wqflask/wqflask/send_mail.py @@ -8,10 +8,12 @@ Redis = StrictRedis() import mailer + def timestamp(): ts = datetime.datetime.utcnow() return ts.isoformat() + def main(): while True: print("I'm alive!") @@ -31,7 +33,6 @@ def main(): process_message(msg) - def process_message(msg): msg = json.loads(msg) diff --git a/wqflask/wqflask/server_side.py b/wqflask/wqflask/server_side.py index 5f764767..e661c407 100644 --- a/wqflask/wqflask/server_side.py +++ b/wqflask/wqflask/server_side.py @@ -1,8 +1,7 @@ # handles server side table processing - -class ServerSideTable(object): +class ServerSideTable: """ This class is used to do server-side processing on the DataTables table such as paginating, sorting, @@ -10,13 +9,11 @@ class ServerSideTable(object): the client-side and reduces the size of data interchanged. Usage: - ServerSideTable(table_data, request_values) + ServerSideTable(rows_count, table_rows, header_data_names, request_values) where, - `table_data` must have data members `rows_count` as number of rows in the table, `table_rows` as data rows of the table, `header_data_names` as headers names of the table. - `request_values` must have request arguments values including the DataTables server-side processing arguments. @@ -31,7 +28,7 @@ class ServerSideTable(object): self.rows_count = rows_count self.table_rows = table_rows self.header_data_names = header_data_names - + self.sort_rows() self.paginate_rows() @@ -50,8 +47,8 @@ class ServerSideTable(object): column_name = self.header_data_names[column_number - 1] sort_direction = self.request_values['sSortDir_' + str(i)] self.table_rows = sorted(self.table_rows, - key=lambda x: x[column_name], - reverse=is_reverse(sort_direction)) + key=lambda x: x[column_name], + reverse=is_reverse(sort_direction)) def paginate_rows(self): """ diff --git a/wqflask/wqflask/show_trait/SampleList.py b/wqflask/wqflask/show_trait/SampleList.py index 857e4456..92cea550 100644 --- a/wqflask/wqflask/show_trait/SampleList.py +++ b/wqflask/wqflask/show_trait/SampleList.py @@ -8,7 +8,7 @@ from pprint import pformat as pf from utility import Plot from utility import Bunch -class SampleList(object): +class SampleList: def __init__(self, dataset, sample_names, @@ -36,16 +36,16 @@ class SampleList(object): if isinstance(self.this_trait, list): sample = webqtlCaseData.webqtlCaseData(name=sample_name) if counter <= len(self.this_trait): - if isinstance(self.this_trait[counter-1], (bytes, bytearray)): - if (self.this_trait[counter-1].decode("utf-8").lower() != 'x'): + if isinstance(self.this_trait[counter - 1], (bytes, bytearray)): + if (self.this_trait[counter - 1].decode("utf-8").lower() != 'x'): sample = webqtlCaseData.webqtlCaseData( name=sample_name, - value=float(self.this_trait[counter-1])) + value=float(self.this_trait[counter - 1])) else: - if (self.this_trait[counter-1].lower() != 'x'): + if (self.this_trait[counter - 1].lower() != 'x'): sample = webqtlCaseData.webqtlCaseData( name=sample_name, - value=float(self.this_trait[counter-1])) + value=float(self.this_trait[counter - 1])) else: # ZS - If there's no value for the sample/strain, # create the sample object (so samples with no value @@ -56,8 +56,8 @@ class SampleList(object): sample = webqtlCaseData.webqtlCaseData(name=sample_name) sample.extra_info = {} - if (self.dataset.group.name == 'AXBXA' and - sample_name in ('AXB18/19/20', 'AXB13/14', 'BXA8/17')): + if (self.dataset.group.name == 'AXBXA' + and sample_name in ('AXB18/19/20', 'AXB13/14', 'BXA8/17')): sample.extra_info['url'] = "/mouseCross.html#AXB/BXA" sample.extra_info['css_class'] = "fs12" @@ -69,24 +69,32 @@ class SampleList(object): sample.extra_attributes = self.sample_attribute_values.get( sample_name, {}) - #ZS: Add a url so RRID case attributes can be displayed as links + # ZS: Add a url so RRID case attributes can be displayed as links if 'rrid' in sample.extra_attributes: if self.dataset.group.species == "mouse": if len(sample.extra_attributes['rrid'].split(":")) > 1: - the_rrid = sample.extra_attributes['rrid'].split(":")[1] - sample.extra_attributes['rrid'] = [sample.extra_attributes['rrid']] - sample.extra_attributes['rrid'].append(webqtlConfig.RRID_MOUSE_URL % the_rrid) + the_rrid = sample.extra_attributes['rrid'].split(":")[ + 1] + sample.extra_attributes['rrid'] = [ + sample.extra_attributes['rrid']] + sample.extra_attributes['rrid'].append( + webqtlConfig.RRID_MOUSE_URL % the_rrid) elif self.dataset.group.species == "rat": if len(str(sample.extra_attributes['rrid'])): - the_rrid = sample.extra_attributes['rrid'].split("_")[1] - sample.extra_attributes['rrid'] = [sample.extra_attributes['rrid']] - sample.extra_attributes['rrid'].append(webqtlConfig.RRID_RAT_URL % the_rrid) + the_rrid = sample.extra_attributes['rrid'].split("_")[ + 1] + sample.extra_attributes['rrid'] = [ + sample.extra_attributes['rrid']] + sample.extra_attributes['rrid'].append( + webqtlConfig.RRID_RAT_URL % the_rrid) self.sample_list.append(sample) self.se_exists = any(sample.variance for sample in self.sample_list) - self.num_cases_exists = any( - sample.num_cases for sample in self.sample_list) + self.num_cases_exists = False + if (any(sample.num_cases for sample in self.sample_list) and + any((sample.num_cases and sample.num_cases != "1") for sample in self.sample_list)): + self.num_cases_exists = True first_attr_col = self.get_first_attr_col() for sample in self.sample_list: @@ -129,13 +137,15 @@ class SampleList(object): self.attributes[key].name = name self.attributes[key].distinct_values = [ item.Value for item in values] - self.attributes[key].distinct_values=natural_sort(self.attributes[key].distinct_values) + self.attributes[key].distinct_values = natural_sort( + self.attributes[key].distinct_values) all_numbers = True for value in self.attributes[key].distinct_values: try: val_as_float = float(value) except: all_numbers = False + break if all_numbers: self.attributes[key].alignment = "right" @@ -169,7 +179,8 @@ class SampleList(object): except ValueError: pass - attribute_values[self.attributes[item.Id].name.lower()] = attribute_value + attribute_values[self.attributes[item.Id].name.lower( + )] = attribute_value self.sample_attribute_values[sample_name] = attribute_values def get_first_attr_col(self): diff --git a/wqflask/wqflask/show_trait/export_trait_data.py b/wqflask/wqflask/show_trait/export_trait_data.py index 379b746c..7fabc3f6 100644 --- a/wqflask/wqflask/show_trait/export_trait_data.py +++ b/wqflask/wqflask/show_trait/export_trait_data.py @@ -5,6 +5,7 @@ from functools import cmp_to_key from base.trait import create_trait from base import data_set + def export_sample_table(targs): sample_data = json.loads(targs['export_data']) @@ -28,6 +29,7 @@ def export_sample_table(targs): return trait_name, final_sample_data + def get_export_metadata(trait_id, dataset_name): dataset = data_set.create_dataset(dataset_name) this_trait = create_trait(dataset=dataset, @@ -38,16 +40,23 @@ def get_export_metadata(trait_id, dataset_name): metadata = [] if dataset.type == "Publish": metadata.append(["Phenotype ID: " + trait_id]) - metadata.append(["Phenotype URL: " + "http://genenetwork.org/show_trait?trait_id=" + trait_id + "&dataset=" + dataset_name]) + metadata.append(["Phenotype URL: " + "http://genenetwork.org/show_trait?trait_id=" + \ + trait_id + "&dataset=" + dataset_name]) metadata.append(["Group: " + dataset.group.name]) - metadata.append(["Phenotype: " + this_trait.description_display.replace(",", "\",\"")]) - metadata.append(["Authors: " + (this_trait.authors if this_trait.authors else "N/A")]) - metadata.append(["Title: " + (this_trait.title if this_trait.title else "N/A")]) - metadata.append(["Journal: " + (this_trait.journal if this_trait.journal else "N/A")]) - metadata.append(["Dataset Link: http://gn1.genenetwork.org/webqtl/main.py?FormID=sharinginfo&InfoPageName=" + dataset.name]) + metadata.append( + ["Phenotype: " + this_trait.description_display.replace(",", "\",\"")]) + metadata.append( + ["Authors: " + (this_trait.authors if this_trait.authors else "N/A")]) + metadata.append( + ["Title: " + (this_trait.title if this_trait.title else "N/A")]) + metadata.append( + ["Journal: " + (this_trait.journal if this_trait.journal else "N/A")]) + metadata.append( + ["Dataset Link: http://gn1.genenetwork.org/webqtl/main.py?FormID=sharinginfo&InfoPageName=" + dataset.name]) else: metadata.append(["Record ID: " + trait_id]) - metadata.append(["Trait URL: " + "http://genenetwork.org/show_trait?trait_id=" + trait_id + "&dataset=" + dataset_name]) + metadata.append(["Trait URL: " + "http://genenetwork.org/show_trait?trait_id=" + \ + trait_id + "&dataset=" + dataset_name]) if this_trait.symbol: metadata.append(["Symbol: " + this_trait.symbol]) metadata.append(["Dataset: " + dataset.name]) @@ -64,6 +73,7 @@ def dict_to_sorted_list(dictionary): sorted_values = [item[1] for item in sorted_list] return sorted_values + def cmp_samples(a, b): if b[0] == 'name': return 1 diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py index 878c41c0..9ee6a16d 100644 --- a/wqflask/wqflask/show_trait/show_trait.py +++ b/wqflask/wqflask/show_trait/show_trait.py @@ -1,7 +1,5 @@ import string -import os import datetime -import pickle import uuid import requests import json as json @@ -11,75 +9,80 @@ from collections import OrderedDict import numpy as np import scipy.stats as ss -from flask import Flask, g +from flask import g from base import webqtlConfig -from base import webqtlCaseData from wqflask.show_trait.SampleList import SampleList from base.trait import create_trait from base import data_set -from db import webqtlDatabaseFunction -from utility import webqtlUtil, Plot, Bunch, helper_functions +from utility import helper_functions from utility.authentication_tools import check_owner_or_admin from utility.tools import locate_ignore_error from utility.redis_tools import get_redis_conn, get_resource_id +from utility.logger import getLogger + + Redis = get_redis_conn() ONE_YEAR = 60 * 60 * 24 * 365 -from pprint import pformat as pf - -from utility.logger import getLogger -logger = getLogger(__name__ ) +logger = getLogger(__name__) ############################################### # -# Todo: Put in security to ensure that user has permission to access confidential data sets -# And add i.p.limiting as necessary +# Todo: Put in security to ensure that user has permission to access +# confidential data sets And add i.p.limiting as necessary # ############################################## -class ShowTrait(object): +class ShowTrait: def __init__(self, kw): if 'trait_id' in kw and kw['dataset'] != "Temp": self.temp_trait = False self.trait_id = kw['trait_id'] helper_functions.get_species_dataset_trait(self, kw) self.resource_id = get_resource_id(self.dataset, self.trait_id) - self.admin_status = check_owner_or_admin(resource_id=self.resource_id) + self.admin_status = check_owner_or_admin( + resource_id=self.resource_id) elif 'group' in kw: self.temp_trait = True - self.trait_id = "Temp_"+kw['species']+ "_" + kw['group'] + "_" + datetime.datetime.now().strftime("%m%d%H%M%S") + self.trait_id = "Temp_" + kw['species'] + "_" + kw['group'] + \ + "_" + datetime.datetime.now().strftime("%m%d%H%M%S") self.temp_species = kw['species'] self.temp_group = kw['group'] - self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = self.temp_group) + self.dataset = data_set.create_dataset( + dataset_name="Temp", dataset_type="Temp", group_name=self.temp_group) - # Put values in Redis so they can be looked up later if added to a collection + # Put values in Redis so they can be looked up later if + # added to a collection Redis.set(self.trait_id, kw['trait_paste'], ex=ONE_YEAR) self.trait_vals = kw['trait_paste'].split() self.this_trait = create_trait(dataset=self.dataset, name=self.trait_id, cellid=None) - self.admin_status = check_owner_or_admin(dataset=self.dataset, trait_id=self.trait_id) + self.admin_status = check_owner_or_admin( + dataset=self.dataset, trait_id=self.trait_id) else: self.temp_trait = True self.trait_id = kw['trait_id'] self.temp_species = self.trait_id.split("_")[1] self.temp_group = self.trait_id.split("_")[2] - self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = self.temp_group) + self.dataset = data_set.create_dataset( + dataset_name="Temp", dataset_type="Temp", group_name=self.temp_group) self.this_trait = create_trait(dataset=self.dataset, name=self.trait_id, cellid=None) self.trait_vals = Redis.get(self.trait_id).split() - self.admin_status = check_owner_or_admin(dataset=self.dataset, trait_id=self.trait_id) + self.admin_status = check_owner_or_admin( + dataset=self.dataset, trait_id=self.trait_id) - #ZS: Get verify/rna-seq link URLs + # ZS: Get verify/rna-seq link URLs try: blatsequence = self.this_trait.blatseq if not blatsequence: - #XZ, 06/03/2009: ProbeSet name is not unique among platforms. We should use ProbeSet Id instead. + # XZ, 06/03/2009: ProbeSet name is not unique among platforms. We should use ProbeSet Id instead. query1 = """SELECT Probe.Sequence, Probe.Name FROM Probe, ProbeSet, ProbeSetFreeze, ProbeSetXRef WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND @@ -96,10 +99,10 @@ class ShowTrait(object): if int(seqt[1][-1]) % 2 == 1: blatsequence += string.strip(seqt[0]) - #--------Hongqiang add this part in order to not only blat ProbeSet, but also blat Probe + # --------Hongqiang add this part in order to not only blat ProbeSet, but also blat Probe blatsequence = '%3E' + self.this_trait.name + '%0A' + blatsequence + '%0A' - #XZ, 06/03/2009: ProbeSet name is not unique among platforms. We should use ProbeSet Id instead. + # XZ, 06/03/2009: ProbeSet name is not unique among platforms. We should use ProbeSet Id instead. query2 = """SELECT Probe.Sequence, Probe.Name FROM Probe, ProbeSet, ProbeSetFreeze, ProbeSetXRef WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND @@ -110,24 +113,29 @@ class ShowTrait(object): seqs = g.db.execute(query2).fetchall() for seqt in seqs: - if int(seqt[1][-1]) %2 == 1: - blatsequence += '%3EProbe_' + seqt[1].strip() + '%0A' + seqt[0].strip() + '%0A' + if int(seqt[1][-1]) % 2 == 1: + blatsequence += '%3EProbe_' + \ + seqt[1].strip() + '%0A' + seqt[0].strip() + '%0A' if self.dataset.group.species == "rat": - self.UCSC_BLAT_URL = webqtlConfig.UCSC_BLAT % ('rat', 'rn6', blatsequence) + self.UCSC_BLAT_URL = webqtlConfig.UCSC_BLAT % ( + 'rat', 'rn6', blatsequence) self.UTHSC_BLAT_URL = "" elif self.dataset.group.species == "mouse": - self.UCSC_BLAT_URL = webqtlConfig.UCSC_BLAT % ('mouse', 'mm10', blatsequence) - self.UTHSC_BLAT_URL = webqtlConfig.UTHSC_BLAT % ('mouse', 'mm10', blatsequence) + self.UCSC_BLAT_URL = webqtlConfig.UCSC_BLAT % ( + 'mouse', 'mm10', blatsequence) + self.UTHSC_BLAT_URL = webqtlConfig.UTHSC_BLAT % ( + 'mouse', 'mm10', blatsequence) elif self.dataset.group.species == "human": - self.UCSC_BLAT_URL = webqtlConfig.UCSC_BLAT % ('human', 'hg38', blatsequence) + self.UCSC_BLAT_URL = webqtlConfig.UCSC_BLAT % ( + 'human', 'hg38', blatsequence) self.UTHSC_BLAT_URL = "" else: self.UCSC_BLAT_URL = "" self.UTHSC_BLAT_URL = "" except: - self.UCSC_BLAT_URL = "" - self.UTHSC_BLAT_URL = "" + self.UCSC_BLAT_URL = "" + self.UTHSC_BLAT_URL = "" if self.dataset.type == "ProbeSet": self.show_probes = "True" @@ -138,22 +146,24 @@ class ShowTrait(object): self.ncbi_summary = get_ncbi_summary(self.this_trait) - #Get nearest marker for composite mapping + # Get nearest marker for composite mapping if not self.temp_trait: if check_if_attr_exists(self.this_trait, 'locus_chr') and self.dataset.type != "Geno" and self.dataset.type != "Publish": - self.nearest_marker = get_nearest_marker(self.this_trait, self.dataset) - #self.nearest_marker1 = get_nearest_marker(self.this_trait, self.dataset)[0] - #self.nearest_marker2 = get_nearest_marker(self.this_trait, self.dataset)[1] + self.nearest_marker = get_nearest_marker( + self.this_trait, self.dataset) else: self.nearest_marker = "" - #self.nearest_marker1 = "" - #self.nearest_marker2 = "" - self.make_sample_lists() - self.qnorm_vals = quantile_normalize_vals(self.sample_groups) - self.z_scores = get_z_scores(self.sample_groups) + trait_vals_by_group = [] + for sample_type in self.sample_groups: + trait_vals_by_group.append(get_trait_vals(sample_type.sample_list)) + + self.max_digits_by_group = get_max_digits(trait_vals_by_group) + + self.qnorm_vals = quantile_normalize_vals(self.sample_groups, trait_vals_by_group) + self.z_scores = get_z_scores(self.sample_groups, trait_vals_by_group) self.temp_uuid = uuid.uuid4() @@ -168,29 +178,40 @@ class ShowTrait(object): categorical_var_list = [] if not self.temp_trait: - categorical_var_list = get_categorical_variables(self.this_trait, self.sample_groups[0]) #ZS: Only using first samplelist, since I think mapping only uses those samples + # ZS: Only using first samplelist, since I think mapping only uses those samples + categorical_var_list = get_categorical_variables( + self.this_trait, self.sample_groups[0]) - #ZS: Get list of chromosomes to select for mapping + # ZS: Get list of chromosomes to select for mapping self.chr_list = [["All", -1]] for i, this_chr in enumerate(self.dataset.species.chromosomes.chromosomes): - self.chr_list.append([self.dataset.species.chromosomes.chromosomes[this_chr].name, i]) + self.chr_list.append( + [self.dataset.species.chromosomes.chromosomes[this_chr].name, i]) self.genofiles = self.dataset.group.get_genofiles() - if "QTLReaper" or "R/qtl" in dataset.group.mapping_names: #ZS: No need to grab scales from .geno file unless it's using a mapping method that reads .geno files + # ZS: No need to grab scales from .geno file unless it's using + # a mapping method that reads .geno files + if "QTLReaper" or "R/qtl" in dataset.group.mapping_names: if self.genofiles: self.scales_in_geno = get_genotype_scales(self.genofiles) else: - self.scales_in_geno = get_genotype_scales(self.dataset.group.name + ".geno") + self.scales_in_geno = get_genotype_scales( + self.dataset.group.name + ".geno") else: self.scales_in_geno = {} self.has_num_cases = has_num_cases(self.this_trait) - #ZS: Needed to know whether to display bar chart + get max sample name length in order to set table column width + # ZS: Needed to know whether to display bar chart + get max + # sample name length in order to set table column width self.num_values = 0 - self.binary = "true" #ZS: So it knows whether to display the Binary R/qtl mapping method, which doesn't work unless all values are 0 or 1 - self.negative_vals_exist = "false" #ZS: Since we don't want to show log2 transform option for situations where it doesn't make sense + # ZS: So it knows whether to display the Binary R/qtl mapping + # method, which doesn't work unless all values are 0 or 1 + self.binary = "true" + # ZS: Since we don't want to show log2 transform option for + # situations where it doesn't make sense + self.negative_vals_exist = "false" max_samplename_width = 1 for group in self.sample_groups: for sample in group.sample_list: @@ -203,9 +224,18 @@ class ShowTrait(object): if sample.value < 0: self.negative_vals_exist = "true" + # ZS: Check whether any attributes have few enough distinct + # values to show the "Block samples by group" option + self.categorical_attr_exists = "false" + for attribute in self.sample_groups[0].attributes: + if len(self.sample_groups[0].attributes[attribute].distinct_values) <= 10: + self.categorical_attr_exists = "true" + break + sample_column_width = max_samplename_width * 8 - self.stats_table_width, self.trait_table_width = get_table_widths(self.sample_groups, sample_column_width, self.has_num_cases) + self.stats_table_width, self.trait_table_width = get_table_widths( + self.sample_groups, sample_column_width, self.has_num_cases) if self.num_values >= 5000: self.maf = 0.01 @@ -236,9 +266,12 @@ class ShowTrait(object): hddn['dataset'] = self.dataset.name hddn['temp_trait'] = False if self.temp_trait: - hddn['temp_trait'] = True - hddn['group'] = self.temp_group - hddn['species'] = self.temp_species + hddn['temp_trait'] = True + hddn['group'] = self.temp_group + hddn['species'] = self.temp_species + else: + hddn['group'] = self.dataset.group.name + hddn['species'] = self.dataset.group.species hddn['use_outliers'] = False hddn['method'] = "gemma" hddn['selected_chr'] = -1 @@ -246,12 +279,13 @@ class ShowTrait(object): hddn['suggestive'] = 0 hddn['num_perm'] = 0 hddn['categorical_vars'] = "" + if categorical_var_list: + hddn['categorical_vars'] = ",".join(categorical_var_list) hddn['manhattan_plot'] = "" hddn['control_marker'] = "" if not self.temp_trait: if hasattr(self.this_trait, 'locus_chr') and self.this_trait.locus_chr != "" and self.dataset.type != "Geno" and self.dataset.type != "Publish": hddn['control_marker'] = self.nearest_marker - #hddn['control_marker'] = self.nearest_marker1+","+self.nearest_marker2 hddn['do_control'] = False hddn['maf'] = 0.05 hddn['mapping_scale'] = "physic" @@ -259,38 +293,47 @@ class ShowTrait(object): hddn['export_data'] = "" hddn['export_format'] = "excel" if len(self.scales_in_geno) < 2: - hddn['mapping_scale'] = self.scales_in_geno[list(self.scales_in_geno.keys())[0]][0][0] + hddn['mapping_scale'] = self.scales_in_geno[list( + self.scales_in_geno.keys())[0]][0][0] - # We'll need access to this_trait and hddn in the Jinja2 Template, so we put it inside self + # We'll need access to this_trait and hddn in the Jinja2 + # Template, so we put it inside self self.hddn = hddn - js_data = dict(trait_id = self.trait_id, - trait_symbol = trait_symbol, - short_description = short_description, - unit_type = trait_units, - dataset_type = self.dataset.type, - species = self.dataset.group.species, - scales_in_geno = self.scales_in_geno, - data_scale = self.dataset.data_scale, - sample_group_types = self.sample_group_types, - sample_lists = sample_lists, - se_exists = self.sample_groups[0].se_exists, - has_num_cases = self.has_num_cases, - attributes = self.sample_groups[0].attributes, - categorical_vars = ",".join(categorical_var_list), - num_values = self.num_values, - qnorm_values = self.qnorm_vals, - zscore_values = self.z_scores, - sample_column_width = sample_column_width, - temp_uuid = self.temp_uuid) + js_data = dict(trait_id=self.trait_id, + trait_symbol=trait_symbol, + max_digits = self.max_digits_by_group, + short_description=short_description, + unit_type=trait_units, + dataset_type=self.dataset.type, + species=self.dataset.group.species, + scales_in_geno=self.scales_in_geno, + data_scale=self.dataset.data_scale, + sample_group_types=self.sample_group_types, + sample_lists=sample_lists, + se_exists=self.sample_groups[0].se_exists, + has_num_cases=self.has_num_cases, + attributes=self.sample_groups[0].attributes, + categorical_attr_exists=self.categorical_attr_exists, + categorical_vars=",".join(categorical_var_list), + num_values=self.num_values, + qnorm_values=self.qnorm_vals, + zscore_values=self.z_scores, + sample_column_width=sample_column_width, + temp_uuid=self.temp_uuid) self.js_data = js_data def get_external_links(self): - #ZS: There's some weirdness here because some fields don't exist while others are empty strings - self.pubmed_link = webqtlConfig.PUBMEDLINK_URL % self.this_trait.pubmed_id if check_if_attr_exists(self.this_trait, 'pubmed_id') else None - self.ncbi_gene_link = webqtlConfig.NCBI_LOCUSID % self.this_trait.geneid if check_if_attr_exists(self.this_trait, 'geneid') else None - self.omim_link = webqtlConfig.OMIM_ID % self.this_trait.omim if check_if_attr_exists(self.this_trait, 'omim') else None - self.homologene_link = webqtlConfig.HOMOLOGENE_ID % self.this_trait.homologeneid if check_if_attr_exists(self.this_trait, 'homologeneid') else None + # ZS: There's some weirdness here because some fields don't + # exist while others are empty strings + self.pubmed_link = webqtlConfig.PUBMEDLINK_URL % self.this_trait.pubmed_id if check_if_attr_exists( + self.this_trait, 'pubmed_id') else None + self.ncbi_gene_link = webqtlConfig.NCBI_LOCUSID % self.this_trait.geneid if check_if_attr_exists( + self.this_trait, 'geneid') else None + self.omim_link = webqtlConfig.OMIM_ID % self.this_trait.omim if check_if_attr_exists( + self.this_trait, 'omim') else None + self.homologene_link = webqtlConfig.HOMOLOGENE_ID % self.this_trait.homologeneid if check_if_attr_exists( + self.this_trait, 'homologeneid') else None self.genbank_link = None if check_if_attr_exists(self.this_trait, 'genbankid'): @@ -312,14 +355,16 @@ class ShowTrait(object): self.panther_link = webqtlConfig.PANTHER_URL % self.this_trait.symbol self.ebi_gwas_link = webqtlConfig.EBIGWAS_URL % self.this_trait.symbol self.protein_atlas_link = webqtlConfig.PROTEIN_ATLAS_URL % self.this_trait.symbol - #self.open_targets_link = webqtlConfig.OPEN_TARGETS_URL % self.this_trait.symbol if self.dataset.group.species == "mouse" or self.dataset.group.species == "human": - self.rgd_link = webqtlConfig.RGD_URL % (self.this_trait.symbol, self.dataset.group.species.capitalize()) + self.rgd_link = webqtlConfig.RGD_URL % ( + self.this_trait.symbol, self.dataset.group.species.capitalize()) if self.dataset.group.species == "mouse": - self.genemania_link = webqtlConfig.GENEMANIA_URL % ("mus-musculus", self.this_trait.symbol) + self.genemania_link = webqtlConfig.GENEMANIA_URL % ( + "mus-musculus", self.this_trait.symbol) else: - self.genemania_link = webqtlConfig.GENEMANIA_URL % ("homo-sapiens", self.this_trait.symbol) + self.genemania_link = webqtlConfig.GENEMANIA_URL % ( + "homo-sapiens", self.this_trait.symbol) if self.dataset.group.species == "mouse": self.aba_link = webqtlConfig.ABA_URL % self.this_trait.symbol @@ -335,14 +380,18 @@ class ShowTrait(object): chr = transcript_start = transcript_end = None if chr and transcript_start and transcript_end and self.this_trait.refseq_transcriptid: - transcript_start = int(transcript_start*1000000) - transcript_end = int(transcript_end*1000000) - self.ucsc_blat_link = webqtlConfig.UCSC_REFSEQ % ('mm10', self.this_trait.refseq_transcriptid, chr, transcript_start, transcript_end) + transcript_start = int(transcript_start * 1000000) + transcript_end = int(transcript_end * 1000000) + self.ucsc_blat_link = webqtlConfig.UCSC_REFSEQ % ( + 'mm10', self.this_trait.refseq_transcriptid, chr, transcript_start, transcript_end) if self.dataset.group.species == "rat": - self.rgd_link = webqtlConfig.RGD_URL % (self.this_trait.symbol, self.dataset.group.species.capitalize()) - self.phenogen_link = webqtlConfig.PHENOGEN_URL % (self.this_trait.symbol) - self.genemania_link = webqtlConfig.GENEMANIA_URL % ("rattus-norvegicus", self.this_trait.symbol) + self.rgd_link = webqtlConfig.RGD_URL % ( + self.this_trait.symbol, self.dataset.group.species.capitalize()) + self.phenogen_link = webqtlConfig.PHENOGEN_URL % ( + self.this_trait.symbol) + self.genemania_link = webqtlConfig.GENEMANIA_URL % ( + "rattus-norvegicus", self.this_trait.symbol) query = """SELECT kgID, chromosome, txStart, txEnd FROM GeneList_rn33 @@ -355,12 +404,15 @@ class ShowTrait(object): kgId = chr = transcript_start = transcript_end = None if chr and transcript_start and transcript_end and kgId: - transcript_start = int(transcript_start*1000000) # Convert to bases from megabases - transcript_end = int(transcript_end*1000000) - self.ucsc_blat_link = webqtlConfig.UCSC_REFSEQ % ('rn6', kgId, chr, transcript_start, transcript_end) + # Convert to bases from megabases + transcript_start = int(transcript_start * 1000000) + transcript_end = int(transcript_end * 1000000) + self.ucsc_blat_link = webqtlConfig.UCSC_REFSEQ % ( + 'rn6', kgId, chr, transcript_start, transcript_end) if self.this_trait.geneid and (self.dataset.group.species == "mouse" or self.dataset.group.species == "rat" or self.dataset.group.species == "human"): - self.biogps_link = webqtlConfig.BIOGPS_URL % (self.dataset.group.species, self.this_trait.geneid) + self.biogps_link = webqtlConfig.BIOGPS_URL % ( + self.dataset.group.species, self.this_trait.geneid) self.gemma_link = webqtlConfig.GEMMA_URL % self.this_trait.geneid if self.dataset.group.species == "human": @@ -381,47 +433,52 @@ class ShowTrait(object): if self.temp_trait == True: dataset_menu = data_set.datasets(this_group) else: - dataset_menu = data_set.datasets(this_group, self.dataset.group) + dataset_menu = data_set.datasets( + this_group, self.dataset.group) dataset_menu_selected = None if len(dataset_menu): if self.dataset: dataset_menu_selected = self.dataset.name - return_results_menu = (100, 200, 500, 1000, 2000, 5000, 10000, 15000, 20000) + return_results_menu = (100, 200, 500, 1000, + 2000, 5000, 10000, 15000, 20000) return_results_menu_selected = 500 - self.corr_tools = dict(dataset_menu = dataset_menu, - dataset_menu_selected = dataset_menu_selected, - return_results_menu = return_results_menu, - return_results_menu_selected = return_results_menu_selected,) + self.corr_tools = dict(dataset_menu=dataset_menu, + dataset_menu_selected=dataset_menu_selected, + return_results_menu=return_results_menu, + return_results_menu_selected=return_results_menu_selected,) def make_sample_lists(self): all_samples_ordered = self.dataset.group.all_samples_ordered() - + parent_f1_samples = [] if self.dataset.group.parlist and self.dataset.group.f1list: parent_f1_samples = self.dataset.group.parlist + self.dataset.group.f1list primary_sample_names = list(all_samples_ordered) - if not self.temp_trait: other_sample_names = [] for sample in list(self.this_trait.data.keys()): if (self.this_trait.data[sample].name2 != self.this_trait.data[sample].name): - if ((self.this_trait.data[sample].name2 in primary_sample_names) and - (self.this_trait.data[sample].name not in primary_sample_names)): - primary_sample_names.append(self.this_trait.data[sample].name) - primary_sample_names.remove(self.this_trait.data[sample].name2) + if ((self.this_trait.data[sample].name2 in primary_sample_names) + and (self.this_trait.data[sample].name not in primary_sample_names)): + primary_sample_names.append( + self.this_trait.data[sample].name) + primary_sample_names.remove( + self.this_trait.data[sample].name2) all_samples_set = set(all_samples_ordered) if sample not in all_samples_set: all_samples_ordered.append(sample) other_sample_names.append(sample) - #ZS: CFW is here because the .geno file doesn't properly contain its full list of samples. This should probably be fixed. + # ZS: CFW is here because the .geno file doesn't properly + # contain its full list of samples. This should probably + # be fixed. if self.dataset.group.species == "human" or (set(primary_sample_names) == set(parent_f1_samples)) or self.dataset.group.name == "CFW": primary_sample_names += other_sample_names other_sample_names = [] @@ -431,64 +488,77 @@ class ShowTrait(object): else: primary_header = "Samples" - primary_samples = SampleList(dataset = self.dataset, - sample_names=primary_sample_names, - this_trait=self.this_trait, - sample_group_type='primary', - header=primary_header) + primary_samples = SampleList(dataset=self.dataset, + sample_names=primary_sample_names, + this_trait=self.this_trait, + sample_group_type='primary', + header=primary_header) - #if other_sample_names and self.dataset.group.species != "human" and self.dataset.group.name != "CFW": + # if other_sample_names and self.dataset.group.species != + # "human" and self.dataset.group.name != "CFW": if len(other_sample_names) > 0: - other_sample_names.sort() #Sort other samples + other_sample_names.sort() # Sort other samples if parent_f1_samples: other_sample_names = parent_f1_samples + other_sample_names other_samples = SampleList(dataset=self.dataset, - sample_names=other_sample_names, - this_trait=self.this_trait, - sample_group_type='other', - header="Other") + sample_names=other_sample_names, + this_trait=self.this_trait, + sample_group_type='other', + header="Other") self.sample_groups = (primary_samples, other_samples) else: self.sample_groups = (primary_samples,) else: - primary_samples = SampleList(dataset = self.dataset, - sample_names=primary_sample_names, - this_trait=self.trait_vals, - sample_group_type='primary', - header="%s Only" % (self.dataset.group.name)) + primary_samples = SampleList(dataset=self.dataset, + sample_names=primary_sample_names, + this_trait=self.trait_vals, + sample_group_type='primary', + header="%s Only" % (self.dataset.group.name)) self.sample_groups = (primary_samples,) self.primary_sample_names = primary_sample_names self.dataset.group.allsamples = all_samples_ordered -def quantile_normalize_vals(sample_groups): +def get_trait_vals(sample_list): + trait_vals = [] + for sample in sample_list: + try: + trait_vals.append(float(sample.value)) + except: + continue + + return trait_vals + +def get_max_digits(trait_vals): + max_digits = [] + for these_vals in trait_vals: + max_val = max(these_vals) + digits = len(str(max_val)) + max_digits.append(digits - 1) + + return max_digits + +def quantile_normalize_vals(sample_groups, trait_vals): def normf(trait_vals): ranked_vals = ss.rankdata(trait_vals) p_list = [] for i, val in enumerate(trait_vals): - p_list.append(((i+1) - 0.5)/len(trait_vals)) + p_list.append(((i + 1) - 0.5) / len(trait_vals)) z = ss.norm.ppf(p_list) normed_vals = [] for rank in ranked_vals: - normed_vals.append("%0.3f" % z[int(rank)-1]) + normed_vals.append("%0.3f" % z[int(rank) - 1]) return normed_vals qnorm_by_group = [] - for sample_type in sample_groups: - trait_vals = [] - for sample in sample_type.sample_list: - try: - trait_vals.append(float(sample.value)) - except: - continue - - qnorm_vals = normf(trait_vals) + for i, sample_type in enumerate(sample_groups): + qnorm_vals = normf(trait_vals[i]) qnorm_vals_with_x = [] counter = 0 for sample in sample_type.sample_list: @@ -503,17 +573,10 @@ def quantile_normalize_vals(sample_groups): return qnorm_by_group -def get_z_scores(sample_groups): +def get_z_scores(sample_groups, trait_vals): zscore_by_group = [] - for sample_type in sample_groups: - trait_vals = [] - for sample in sample_type.sample_list: - try: - trait_vals.append(float(sample.value)) - except: - continue - - zscores = ss.mstats.zscore(np.array(trait_vals)).tolist() + for i, sample_type in enumerate(sample_groups): + zscores = ss.mstats.zscore(np.array(trait_vals[i])).tolist() zscores_with_x = [] counter = 0 for sample in sample_type.sample_list: @@ -531,20 +594,20 @@ def get_z_scores(sample_groups): def get_nearest_marker(this_trait, this_db): this_chr = this_trait.locus_chr this_mb = this_trait.locus_mb - #One option is to take flanking markers, another is to take the two (or one) closest + # One option is to take flanking markers, another is to take the + # two (or one) closest query = """SELECT Geno.Name FROM Geno, GenoXRef, GenoFreeze WHERE Geno.Chr = '{}' AND GenoXRef.GenoId = Geno.Id AND GenoFreeze.Id = GenoXRef.GenoFreezeId AND GenoFreeze.Name = '{}' - ORDER BY ABS( Geno.Mb - {}) LIMIT 1""".format(this_chr, this_db.group.name+"Geno", this_mb) + ORDER BY ABS( Geno.Mb - {}) LIMIT 1""".format(this_chr, this_db.group.name + "Geno", this_mb) logger.sql(query) result = g.db.execute(query).fetchall() if result == []: return "" - #return "", "" else: return result[0][0] @@ -559,9 +622,7 @@ def get_table_widths(sample_groups, sample_column_width, has_num_cases=False): trait_table_width += 80 if has_num_cases: trait_table_width += 80 - trait_table_width += len(sample_groups[0].attributes)*88 - - trait_table_width = str(trait_table_width) + "px" + trait_table_width += len(sample_groups[0].attributes) * 88 return stats_table_width, trait_table_width @@ -570,7 +631,7 @@ def has_num_cases(this_trait): has_n = False if this_trait.dataset.type != "ProbeSet" and this_trait.dataset.type != "Geno": for name, sample in list(this_trait.data.items()): - if sample.num_cases: + if sample.num_cases and sample.num_cases != "1": has_n = True break @@ -609,10 +670,13 @@ def check_if_attr_exists(the_trait, id_type): def get_ncbi_summary(this_trait): if check_if_attr_exists(this_trait, 'geneid'): - #ZS: Need to switch this try/except to something that checks the output later + # ZS: Need to switch this try/except to something that checks + # the output later try: - response = requests.get("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=gene&id=%s&retmode=json" % this_trait.geneid) - summary = json.loads(response.content)['result'][this_trait.geneid]['summary'] + response = requests.get( + "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=gene&id=%s&retmode=json" % this_trait.geneid) + summary = json.loads(response.content)[ + 'result'][this_trait.geneid]['summary'] return summary except: return None @@ -620,20 +684,12 @@ def get_ncbi_summary(this_trait): return None -def get_categorical_variables(this_trait, sample_list): +def get_categorical_variables(this_trait, sample_list) -> list: categorical_var_list = [] if len(sample_list.attributes) > 0: for attribute in sample_list.attributes: - attribute_vals = [] - for sample_name in list(this_trait.data.keys()): - if sample_list.attributes[attribute].name in this_trait.data[sample_name].extra_attributes: - attribute_vals.append(this_trait.data[sample_name].extra_attributes[sample_list.attributes[attribute].name]) - else: - attribute_vals.append("N/A") - num_distinct = len(set(attribute_vals)) - - if num_distinct < 10: + if len(sample_list.attributes[attribute].distinct_values) < 10: categorical_var_list.append(sample_list.attributes[attribute].name) return categorical_var_list @@ -644,7 +700,8 @@ def get_genotype_scales(genofiles): if isinstance(genofiles, list): for the_file in genofiles: file_location = the_file['location'] - geno_scales[file_location] = get_scales_from_genofile(file_location) + geno_scales[file_location] = get_scales_from_genofile( + file_location) else: geno_scales[genofiles] = get_scales_from_genofile(genofiles) @@ -653,8 +710,8 @@ def get_genotype_scales(genofiles): def get_scales_from_genofile(file_location): geno_path = locate_ignore_error(file_location, 'genotype') - - if not geno_path: #ZS: This is just to allow the code to run when + # ZS: This is just to allow the code to run when + if not geno_path: return [["physic", "Mb"]] cm_and_mb_cols_exist = True cm_column = None @@ -662,7 +719,9 @@ def get_scales_from_genofile(file_location): with open(geno_path, "r") as geno_fh: for i, line in enumerate(geno_fh): if line[0] == "#" or line[0] == "@": - if "@scale" in line: #ZS: If the scale is made explicit in the metadata, use that + # ZS: If the scale is made explicit in the metadata, + # use that + if "@scale" in line: scale = line.split(":")[1].strip() if scale == "morgan": return [["morgan", "cM"]] @@ -682,12 +741,16 @@ def get_scales_from_genofile(file_location): mb_column = 3 break - #ZS: This attempts to check whether the cM and Mb columns are 'real', since some .geno files have one column be a copy of the other column, or have one column that is all 0s + # ZS: This attempts to check whether the cM and Mb columns are + # 'real', since some .geno files have one column be a copy of + # the other column, or have one column that is all 0s cm_all_zero = True mb_all_zero = True cm_mb_all_equal = True for i, line in enumerate(geno_fh): - if first_marker_line <= i < first_marker_line + 10: #ZS: I'm assuming there won't be more than 10 markers where the position is listed as 0 + # ZS: I'm assuming there won't be more than 10 markers + # where the position is listed as 0 + if first_marker_line <= i < first_marker_line + 10: if cm_column: cm_val = line.split("\t")[cm_column].strip() if cm_val != "0": @@ -703,8 +766,8 @@ def get_scales_from_genofile(file_location): if i > first_marker_line + 10: break - - #ZS: This assumes that both won't be all zero, since if that's the case mapping shouldn't be an option to begin with + # ZS: This assumes that both won't be all zero, since if that's + # the case mapping shouldn't be an option to begin with if mb_all_zero: return [["morgan", "cM"]] elif cm_mb_all_equal: diff --git a/wqflask/wqflask/snp_browser/snp_browser.py b/wqflask/wqflask/snp_browser/snp_browser.py index a52399a2..c4d0e135 100644 --- a/wqflask/wqflask/snp_browser/snp_browser.py +++ b/wqflask/wqflask/snp_browser/snp_browser.py @@ -9,7 +9,8 @@ logger = getLogger(__name__) from base import species from base import webqtlConfig -class SnpBrowser(object): + +class SnpBrowser: def __init__(self, start_vars): self.strain_lists = get_browser_sample_lists() @@ -26,9 +27,11 @@ class SnpBrowser(object): self.table_rows = [] if self.limit_strains == "true": - self.header_fields, self.empty_field_count, self.header_data_names = get_header_list(variant_type = self.variant_type, strains = self.chosen_strains, empty_columns = self.empty_columns) + self.header_fields, self.empty_field_count, self.header_data_names = get_header_list( + variant_type=self.variant_type, strains=self.chosen_strains, empty_columns=self.empty_columns) else: - self.header_fields, self.empty_field_count, self.header_data_names = get_header_list(variant_type = self.variant_type, strains = self.strain_lists, species = self.species_name, empty_columns = self.empty_columns) + self.header_fields, self.empty_field_count, self.header_data_names = get_header_list( + variant_type=self.variant_type, strains=self.strain_lists, species=self.species_name, empty_columns=self.empty_columns) def initialize_parameters(self, start_vars): if 'first_run' in start_vars: @@ -52,10 +55,12 @@ class SnpBrowser(object): self.rat_chr_list = [] mouse_species_ob = species.TheSpecies(species_name="Mouse") for key in mouse_species_ob.chromosomes.chromosomes: - self.mouse_chr_list.append(mouse_species_ob.chromosomes.chromosomes[key].name) + self.mouse_chr_list.append( + mouse_species_ob.chromosomes.chromosomes[key].name) rat_species_ob = species.TheSpecies(species_name="Rat") for key in rat_species_ob.chromosomes.chromosomes: - self.rat_chr_list.append(rat_species_ob.chromosomes.chromosomes[key].name) + self.rat_chr_list.append( + rat_species_ob.chromosomes.chromosomes[key].name) if self.species_id == 1: self.this_chr_list = self.mouse_chr_list @@ -108,9 +113,11 @@ class SnpBrowser(object): "CAST/EiJ"] self.chosen_strains_rat = ["BN", "F344", "WLI", "WMI"] if 'chosen_strains_mouse' in start_vars: - self.chosen_strains_mouse = start_vars['chosen_strains_mouse'].split(",") + self.chosen_strains_mouse = start_vars['chosen_strains_mouse'].split( + ",") if 'chosen_strains_rat' in start_vars: - self.chosen_strains_rat = start_vars['chosen_strains_rat'].split(",") + self.chosen_strains_rat = start_vars['chosen_strains_rat'].split( + ",") if self.species_id == 1: self.chosen_strains = self.chosen_strains_mouse @@ -149,9 +156,11 @@ class SnpBrowser(object): if self.gene_name != "": if self.species_id != 0: - query = "SELECT geneSymbol, chromosome, txStart, txEnd FROM GeneList WHERE SpeciesId = %s AND geneSymbol = '%s'" % (self.species_id, self.gene_name) + query = "SELECT geneSymbol, chromosome, txStart, txEnd FROM GeneList WHERE SpeciesId = %s AND geneSymbol = '%s'" % ( + self.species_id, self.gene_name) else: - query = "SELECT geneSymbol, chromosome, txStart, txEnd FROM GeneList WHERE geneSymbol = '%s'" % (self.gene_name) + query = "SELECT geneSymbol, chromosome, txStart, txEnd FROM GeneList WHERE geneSymbol = '%s'" % ( + self.gene_name) result = g.db.execute(query).fetchone() if result: self.gene_name, self.chr, self.start_mb, self.end_mb = result @@ -162,9 +171,11 @@ class SnpBrowser(object): query = "SELECT Id, Chromosome, Position, Position+0.000001 FROM SnpAll WHERE Rs = '%s'" % self.gene_name else: if self.species_id != 0: - query = "SELECT Id, Chromosome, Position, Position+0.000001 FROM SnpAll where SpeciesId = %s AND SnpName = '%s'" % (self.species_id, self.gene_name) + query = "SELECT Id, Chromosome, Position, Position+0.000001 FROM SnpAll where SpeciesId = %s AND SnpName = '%s'" % ( + self.species_id, self.gene_name) else: - query = "SELECT Id, Chromosome, Position, Position+0.000001 FROM SnpAll where SnpName = '%s'" % (self.gene_name) + query = "SELECT Id, Chromosome, Position, Position+0.000001 FROM SnpAll where SnpName = '%s'" % ( + self.gene_name) result_snp = g.db.execute(query).fetchall() if result_snp: self.snp_list = [item[0] for item in result_snp] @@ -176,9 +187,11 @@ class SnpBrowser(object): elif self.variant_type == "InDel": if self.gene_name[0] == "I": if self.species_id != 0: - query = "SELECT Id, Chromosome, Mb_start, Mb_end FROM IndelAll WHERE SpeciesId = %s AND Name = '%s'" % (self.species_id, self.gene_name) + query = "SELECT Id, Chromosome, Mb_start, Mb_end FROM IndelAll WHERE SpeciesId = %s AND Name = '%s'" % ( + self.species_id, self.gene_name) else: - query = "SELECT Id, Chromosome, Mb_start, Mb_end FROM IndelAll WHERE Name = '%s'" % (self.gene_name) + query = "SELECT Id, Chromosome, Mb_start, Mb_end FROM IndelAll WHERE Name = '%s'" % ( + self.gene_name) result_snp = g.db.execute(query).fetchall() if result_snp: self.snp_list = [item[0] for item in result_snp] @@ -249,12 +262,13 @@ class SnpBrowser(object): def filter_results(self, results): filtered_results = [] - strain_index_list = [] #ZS: List of positions of selected strains in strain list + strain_index_list = [] # ZS: List of positions of selected strains in strain list last_mb = -1 if self.limit_strains == "true" and len(self.chosen_strains) > 0: for item in self.chosen_strains: - index = self.strain_lists[self.species_name.lower()].index(item) + index = self.strain_lists[self.species_name.lower()].index( + item) strain_index_list.append(index) for seq, result in enumerate(results): @@ -262,7 +276,8 @@ class SnpBrowser(object): if self.variant_type == "SNP": display_strains = [] - snp_id, species_id, snp_name, rs, chr, mb, mb_2016, alleles, snp_source, conservation_score = result[:10] + snp_id, species_id, snp_name, rs, chr, mb, mb_2016, alleles, snp_source, conservation_score = result[ + :10] effect_list = result[10:28] if self.species_id == 1: self.allele_list = result[30:] @@ -272,13 +287,14 @@ class SnpBrowser(object): if self.limit_strains == "true" and len(self.chosen_strains) > 0: for index in strain_index_list: if self.species_id == 1: - display_strains.append(result[29+index]) + display_strains.append(result[29 + index]) elif self.species_id == 2: - display_strains.append(result[31+index]) + display_strains.append(result[31 + index]) self.allele_list = display_strains effect_info_dict = get_effect_info(effect_list) - coding_domain_list = ['Start Gained', 'Start Lost', 'Stop Gained', 'Stop Lost', 'Nonsynonymous', 'Synonymous'] + coding_domain_list = ['Start Gained', 'Start Lost', + 'Stop Gained', 'Stop Lost', 'Nonsynonymous', 'Synonymous'] intron_domain_list = ['Splice Site', 'Nonsplice Site'] for key in effect_info_dict: @@ -295,19 +311,22 @@ class SnpBrowser(object): if 'Intergenic' in domain: if self.gene_name != "": - gene_id = get_gene_id(self.species_id, self.gene_name) + gene_id = get_gene_id( + self.species_id, self.gene_name) gene = [gene_id, self.gene_name] else: gene = check_if_in_gene(species_id, chr, mb) transcript = exon = function = function_details = '' - if self.redundant == "false" or last_mb != mb: # filter redundant + if self.redundant == "false" or last_mb != mb: # filter redundant if self.include_record(domain, function, snp_source, conservation_score): - info_list = [snp_name, rs, chr, mb, alleles, gene, transcript, exon, domain, function, function_details, snp_source, conservation_score, snp_id] + info_list = [snp_name, rs, chr, mb, alleles, gene, transcript, exon, domain, + function, function_details, snp_source, conservation_score, snp_id] info_list.extend(self.allele_list) filtered_results.append(info_list) last_mb = mb else: - gene_list, transcript_list, exon_list, function_list, function_details_list = effect_info_dict[key] + gene_list, transcript_list, exon_list, function_list, function_details_list = effect_info_dict[ + key] for index, item in enumerate(gene_list): gene = item transcript = transcript_list[index] @@ -324,13 +343,15 @@ class SnpBrowser(object): function = "" if function_details_list: - function_details = "Biotype: " + function_details_list[index] + function_details = "Biotype: " + \ + function_details_list[index] else: function_details = "" if self.redundant == "false" or last_mb != mb: if self.include_record(domain, function, snp_source, conservation_score): - info_list = [snp_name, rs, chr, mb, alleles, gene, transcript, exon, domain, function, function_details, snp_source, conservation_score, snp_id] + info_list = [snp_name, rs, chr, mb, alleles, gene, transcript, exon, domain, + function, function_details, snp_source, conservation_score, snp_id] info_list.extend(self.allele_list) filtered_results.append(info_list) last_mb = mb @@ -344,7 +365,8 @@ class SnpBrowser(object): gene = "No Gene" domain = conservation_score = snp_id = snp_name = rs = flank_3 = flank_5 = ncbi = function = "" if self.include_record(domain, function, source_name, conservation_score): - filtered_results.append([indel_name, indel_chr, indel_mb_start, indel_mb_end, indel_strand, indel_type, indel_size, indel_sequence, source_name]) + filtered_results.append([indel_name, indel_chr, indel_mb_start, indel_mb_end, + indel_strand, indel_type, indel_size, indel_sequence, source_name]) last_mb = indel_mb_start else: @@ -364,9 +386,10 @@ class SnpBrowser(object): if gene_name and (gene_name not in gene_name_list): gene_name_list.append(gene_name) if len(gene_name_list) > 0: - gene_id_name_dict = get_gene_id_name_dict(self.species_id, gene_name_list) + gene_id_name_dict = get_gene_id_name_dict( + self.species_id, gene_name_list) - #ZS: list of booleans representing which columns are entirely empty, so they aren't displayed on the page; only including ones that are sometimes empty (since there's always a location, etc) + # ZS: list of booleans representing which columns are entirely empty, so they aren't displayed on the page; only including ones that are sometimes empty (since there's always a location, etc) self.empty_columns = { "snp_source": "false", "conservation_score": "false", @@ -382,20 +405,23 @@ class SnpBrowser(object): for i, result in enumerate(self.filtered_results): this_row = {} if self.variant_type == "SNP": - snp_name, rs, chr, mb, alleles, gene, transcript, exon, domain, function, function_details, snp_source, conservation_score, snp_id = result[:14] + snp_name, rs, chr, mb, alleles, gene, transcript, exon, domain, function, function_details, snp_source, conservation_score, snp_id = result[ + :14] allele_value_list = result[14:] if rs: snp_url = webqtlConfig.DBSNP % (rs) snp_name = rs else: rs = "" - start_bp = int(mb*1000000 - 100) - end_bp = int(mb*1000000 + 100) + start_bp = int(mb * 1000000 - 100) + end_bp = int(mb * 1000000 + 100) position_info = "chr%s:%d-%d" % (chr, start_bp, end_bp) if self.species_id == 2: - snp_url = webqtlConfig.GENOMEBROWSER_URL % ("rn6", position_info) + snp_url = webqtlConfig.GENOMEBROWSER_URL % ( + "rn6", position_info) else: - snp_url = webqtlConfig.GENOMEBROWSER_URL % ("mm10", position_info) + snp_url = webqtlConfig.GENOMEBROWSER_URL % ( + "mm10", position_info) mb = float(mb) mb_formatted = "%2.6f" % mb @@ -428,13 +454,14 @@ class SnpBrowser(object): gene_link = "" if transcript: - transcript_link = webqtlConfig.ENSEMBLETRANSCRIPT_URL % (transcript) + transcript_link = webqtlConfig.ENSEMBLETRANSCRIPT_URL % ( + transcript) self.empty_columns['transcript'] = "true" else: transcript_link = "" if exon: - exon = exon[1] # exon[0] is exon_id, exon[1] is exon_rank + exon = exon[1] # exon[0] is exon_id, exon[1] is exon_rank self.empty_columns['exon'] = "true" else: exon = "" @@ -459,20 +486,20 @@ class SnpBrowser(object): function_list = function_details.strip().split(",") function_list = [item.strip() for item in function_list] function_list[0] = function_list[0].title() - function_details = ", ".join(item for item in function_list) + function_details = ", ".join( + item for item in function_list) function_details = function_details.replace("_", " ") function_details = function_details.replace("/", " -> ") if function_details == "Biotype: Protein Coding": function_details = function_details + ", Coding Region Unknown" self.empty_columns['function_details'] = "true" - + #[snp_href, chr, mb_formatted, alleles, snp_source_cell, conservation_score, gene_name_cell, transcript_href, exon, domain_1, domain_2, function, function_details] - base_color_dict = {"A": "#C33232", "C": "#1569C7", "T": "#CFCF32", "G": "#32C332", + base_color_dict = {"A": "#C33232", "C": "#1569C7", "T": "#CFCF32", "G": "#32C332", "t": "#FF6", "c": "#5CB3FF", "a": "#F66", "g": "#CF9", ":": "#FFFFFF", "-": "#FFFFFF", "?": "#FFFFFF"} - the_bases = [] for j, item in enumerate(allele_value_list): if item and isinstance(item, str): @@ -575,7 +602,7 @@ class SnpBrowser(object): if conservation_score: score_as_float = float(conservation_score) try: - input_score_float = float(self.score) # the user-input score + input_score_float = float(self.score) # the user-input score except: input_score_float = 0.0 @@ -628,30 +655,31 @@ class SnpBrowser(object): left_offset, right_offset, top_offset, bottom_offset = (30, 30, 40, 50) plot_width = canvas_width - left_offset - right_offset plot_height = canvas_height - top_offset - bottom_offset - y_zero = top_offset + plot_height/2 + y_zero = top_offset + plot_height / 2 - x_scale = plot_width/(self.end_mb - self.start_mb) + x_scale = plot_width / (self.end_mb - self.start_mb) - #draw clickable image map at some point + # draw clickable image map at some point n_click = 80.0 - click_step = plot_width/n_click - click_mb_step = (self.end_mb - self.start_mb)/n_click + click_step = plot_width / n_click + click_mb_step = (self.end_mb - self.start_mb) / n_click - #for i in range(n_click): + # for i in range(n_click): # href = url_for('snp_browser', first_run="false", chosen_strains_mouse=self.chosen_strains_mouse, chosen_strains_rat=self.chosen_strains_rat, variant=self.variant_type, species=self.species_name, gene_name=self.gene_name, chr=self.chr, start_mb=self.start_mb, end_mb=self.end_mb, limit_strains=self.limit_strains, domain=self.domain, function=self.function, criteria=self.criteria, score=self.score, diff_alleles=self.diff_alleles) + def get_browser_sample_lists(species_id=1): strain_lists = {} mouse_strain_list = [] query = "SHOW COLUMNS FROM SnpPattern;" - results = g.db.execute(query).fetchall(); + results = g.db.execute(query).fetchall() for result in results[1:]: mouse_strain_list.append(result[0]) rat_strain_list = [] query = "SHOW COLUMNS FROM RatSnpPattern;" - results = g.db.execute(query).fetchall(); + results = g.db.execute(query).fetchall() for result in results[2:]: rat_strain_list.append(result[0]) @@ -660,7 +688,8 @@ def get_browser_sample_lists(species_id=1): return strain_lists -def get_header_list(variant_type, strains, species = None, empty_columns = None): + +def get_header_list(variant_type, strains, species=None, empty_columns=None): if species == "Mouse": strain_list = strains['mouse'] elif species == "Rat": @@ -668,13 +697,15 @@ def get_header_list(variant_type, strains, species = None, empty_columns = None) else: strain_list = strains - empty_field_count = 0 #ZS: This is an awkward way of letting the javascript know the index where the allele value columns start; there's probably a better way of doing this + empty_field_count = 0 # ZS: This is an awkward way of letting the javascript know the index where the allele value columns start; there's probably a better way of doing this header_fields = [] header_data_names = [] if variant_type == "SNP": - header_fields.append(['Index', 'SNP ID', 'Chr', 'Mb', 'Alleles', 'Source', 'ConScore', 'Gene', 'Transcript', 'Exon', 'Domain 1', 'Domain 2', 'Function', 'Details']) - header_data_names = ['index', 'snp_name', 'chr', 'mb_formatted', 'alleles', 'snp_source', 'conservation_score', 'gene_name', 'transcript', 'exon', 'domain_1', 'domain_2', 'function', 'function_details'] + header_fields.append(['Index', 'SNP ID', 'Chr', 'Mb', 'Alleles', 'Source', 'ConScore', + 'Gene', 'Transcript', 'Exon', 'Domain 1', 'Domain 2', 'Function', 'Details']) + header_data_names = ['index', 'snp_name', 'chr', 'mb_formatted', 'alleles', 'snp_source', 'conservation_score', + 'gene_name', 'transcript', 'exon', 'domain_1', 'domain_2', 'function', 'function_details'] header_fields.append(strain_list) header_data_names += strain_list @@ -704,18 +735,21 @@ def get_header_list(variant_type, strains, species = None, empty_columns = None) if empty_columns['function_details'] == "false": empty_field_count += 1 header_fields[0].remove('Details') - + for col in empty_columns.keys(): if empty_columns[col] == "false": header_data_names.remove(col) elif variant_type == "InDel": - header_fields = ['Index', 'ID', 'Type', 'InDel Chr', 'Mb Start', 'Mb End', 'Strand', 'Size', 'Sequence', 'Source'] - header_data_names = ['index', 'indel_name', 'indel_type', 'indel_chr', 'indel_mb_s', 'indel_mb_e', 'indel_strand', 'indel_size', 'indel_sequence', 'source_name'] + header_fields = ['Index', 'ID', 'Type', 'InDel Chr', + 'Mb Start', 'Mb End', 'Strand', 'Size', 'Sequence', 'Source'] + header_data_names = ['index', 'indel_name', 'indel_type', 'indel_chr', 'indel_mb_s', + 'indel_mb_e', 'indel_strand', 'indel_size', 'indel_sequence', 'source_name'] return header_fields, empty_field_count, header_data_names -def get_effect_details_by_category(effect_name = None, effect_value = None): + +def get_effect_details_by_category(effect_name=None, effect_value=None): gene_list = [] transcript_list = [] exon_list = [] @@ -723,10 +757,13 @@ def get_effect_details_by_category(effect_name = None, effect_value = None): function_detail_list = [] tmp_list = [] - gene_group_list = ['Upstream', 'Downstream', 'Splice Site', 'Nonsplice Site', '3\' UTR'] - biotype_group_list = ['Unknown Effect In Exon', 'Start Gained', 'Start Lost', 'Stop Gained', 'Stop Lost', 'Nonsynonymous', 'Synonymous'] + gene_group_list = ['Upstream', 'Downstream', + 'Splice Site', 'Nonsplice Site', '3\' UTR'] + biotype_group_list = ['Unknown Effect In Exon', 'Start Gained', + 'Start Lost', 'Stop Gained', 'Stop Lost', 'Nonsynonymous', 'Synonymous'] new_codon_group_list = ['Start Gained'] - codon_effect_group_list = ['Start Lost', 'Stop Gained', 'Stop Lost', 'Nonsynonymous', 'Synonymous'] + codon_effect_group_list = [ + 'Start Lost', 'Stop Gained', 'Stop Lost', 'Nonsynonymous', 'Synonymous'] effect_detail_list = effect_value.strip().split('|') effect_detail_list = [item.strip() for item in effect_detail_list] @@ -764,13 +801,16 @@ def get_effect_details_by_category(effect_name = None, effect_value = None): return [gene_list, transcript_list, exon_list, function_list, function_detail_list] + def get_effect_info(effect_list): domain = "" effect_detail_list = [] effect_info_dict = {} - prime3_utr, prime5_utr, upstream, downstream, intron, nonsplice_site, splice_site, intergenic = effect_list[:8] - exon, non_synonymous_coding, synonymous_coding, start_gained, start_lost, stop_gained, stop_lost, unknown_effect_in_exon = effect_list[8:16] + prime3_utr, prime5_utr, upstream, downstream, intron, nonsplice_site, splice_site, intergenic = effect_list[ + :8] + exon, non_synonymous_coding, synonymous_coding, start_gained, start_lost, stop_gained, stop_lost, unknown_effect_in_exon = effect_list[ + 8:16] if intergenic: domain = "Intergenic" @@ -779,63 +819,77 @@ def get_effect_info(effect_list): # if not exon, get gene list/transcript list info if upstream: domain = "Upstream" - effect_detail_list = get_effect_details_by_category(effect_name='Upstream', effect_value=upstream) + effect_detail_list = get_effect_details_by_category( + effect_name='Upstream', effect_value=upstream) effect_info_dict[domain] = effect_detail_list if downstream: domain = "Downstream" - effect_detail_list = get_effect_details_by_category(effect_name='Downstream', effect_value=downstream) + effect_detail_list = get_effect_details_by_category( + effect_name='Downstream', effect_value=downstream) effect_info_dict[domain] = effect_detail_list if intron: if splice_site: domain = "Splice Site" - effect_detail_list = get_effect_details_by_category(effect_name='Splice Site', effect_value=splice_site) + effect_detail_list = get_effect_details_by_category( + effect_name='Splice Site', effect_value=splice_site) effect_info_dict[domain] = effect_detail_list if nonsplice_site: domain = "Nonsplice Site" - effect_detail_list = get_effect_details_by_category(effect_name='Nonsplice Site', effect_value=nonsplice_site) + effect_detail_list = get_effect_details_by_category( + effect_name='Nonsplice Site', effect_value=nonsplice_site) effect_info_dict[domain] = effect_detail_list # get gene, transcript_list, and exon info if prime3_utr: domain = "3\' UTR" - effect_detail_list = get_effect_details_by_category(effect_name='3\' UTR', effect_value=prime3_utr) + effect_detail_list = get_effect_details_by_category( + effect_name='3\' UTR', effect_value=prime3_utr) effect_info_dict[domain] = effect_detail_list if prime5_utr: domain = "5\' UTR" - effect_detail_list = get_effect_details_by_category(effect_name='5\' UTR', effect_value=prime5_utr) + effect_detail_list = get_effect_details_by_category( + effect_name='5\' UTR', effect_value=prime5_utr) effect_info_dict[domain] = effect_detail_list if start_gained: domain = "Start Gained" - effect_detail_list = get_effect_details_by_category(effect_name='Start Gained', effect_value=start_gained) + effect_detail_list = get_effect_details_by_category( + effect_name='Start Gained', effect_value=start_gained) effect_info_dict[domain] = effect_detail_list if unknown_effect_in_exon: domain = "Unknown Effect In Exon" - effect_detail_list = get_effect_details_by_category(effect_name='Unknown Effect In Exon', effect_value=unknown_effect_in_exon) + effect_detail_list = get_effect_details_by_category( + effect_name='Unknown Effect In Exon', effect_value=unknown_effect_in_exon) effect_info_dict[domain] = effect_detail_list if start_lost: domain = "Start Lost" - effect_detail_list = get_effect_details_by_category(effect_name='Start Lost', effect_value=start_lost) + effect_detail_list = get_effect_details_by_category( + effect_name='Start Lost', effect_value=start_lost) effect_info_dict[domain] = effect_detail_list if stop_gained: domain = "Stop Gained" - effect_detail_list = get_effect_details_by_category(effect_name='Stop Gained', effect_value=stop_gained) + effect_detail_list = get_effect_details_by_category( + effect_name='Stop Gained', effect_value=stop_gained) effect_info_dict[domain] = effect_detail_list if stop_lost: domain = "Stop Lost" - effect_detail_list = get_effect_details_by_category(effect_name='Stop Lost', effect_value=stop_lost) + effect_detail_list = get_effect_details_by_category( + effect_name='Stop Lost', effect_value=stop_lost) effect_info_dict[domain] = effect_detail_list if non_synonymous_coding: domain = "Nonsynonymous" - effect_detail_list = get_effect_details_by_category(effect_name='Nonsynonymous', effect_value=non_synonymous_coding) + effect_detail_list = get_effect_details_by_category( + effect_name='Nonsynonymous', effect_value=non_synonymous_coding) effect_info_dict[domain] = effect_detail_list if synonymous_coding: domain = "Synonymous" - effect_detail_list = get_effect_details_by_category(effect_name='Synonymous', effect_value=synonymous_coding) + effect_detail_list = get_effect_details_by_category( + effect_name='Synonymous', effect_value=synonymous_coding) effect_info_dict[domain] = effect_detail_list return effect_info_dict + def get_gene_id(species_id, gene_name): query = """ SELECT @@ -853,11 +907,13 @@ def get_gene_id(species_id, gene_name): else: return "" + def get_gene_id_name_dict(species_id, gene_name_list): gene_id_name_dict = {} if len(gene_name_list) == 0: return "" - gene_name_str_list = ["'" + gene_name + "'" for gene_name in gene_name_list] + gene_name_str_list = ["'" + gene_name + \ + "'" for gene_name in gene_name_list] gene_name_str = ",".join(gene_name_str_list) query = """ @@ -877,8 +933,9 @@ def get_gene_id_name_dict(species_id, gene_name_list): return gene_id_name_dict + def check_if_in_gene(species_id, chr, mb): - if species_id != 0: #ZS: Check if this is necessary + if species_id != 0: # ZS: Check if this is necessary query = """SELECT geneId, geneSymbol FROM GeneList WHERE SpeciesId = {0} AND chromosome = '{1}' AND @@ -895,4 +952,3 @@ def check_if_in_gene(species_id, chr, mb): return [result[0], result[1]] else: return "" - diff --git a/wqflask/wqflask/static/new/javascript/dataset_select_menu_edit_trait.js b/wqflask/wqflask/static/new/javascript/dataset_select_menu_edit_trait.js new file mode 100644 index 00000000..1d4a94d9 --- /dev/null +++ b/wqflask/wqflask/static/new/javascript/dataset_select_menu_edit_trait.js @@ -0,0 +1,253 @@ +var apply_default, check_search_term, dataset_info, group_info, make_default, open_window, populate_dataset, populate_group, populate_species, populate_type, process_json, redo_dropdown; +process_json = function(data) { + window.jdata = data; + populate_species(); + if ($('#type').length > 0) { //This is to determine if it's the index page or the submit_trait page (which only has species and group selection and no make default option) + return apply_default(); + } +}; + +$.ajax('/api/v_pre1/gen_dropdown', { + dataType: 'json', + success: process_json +}); + +populate_species = function() { + var species_list; + species_list = this.jdata.species; + redo_dropdown($('#species'), species_list); + return populate_group(); +}; +window.populate_species = populate_species; +populate_group = function() { + var group_list, species; + console.log("in populate group"); + species = $('#species').val(); + group_list = this.jdata.groups[species]; + for (_i = 0, _len = group_list.length; _i < (_len - 1); _i++) { + if (group_list[_i][0] == "BXD300"){ + group_list.splice(_i, 1) + } + } + redo_dropdown($('#group'), group_list); + if ($('#type').length > 0) { //This is to determine if it's the index page or the submit_trait page (which only has species and group selection and no make default option) + return populate_type(); + } +}; +window.populate_group = populate_group; +populate_type = function() { + var group, species, type_list; + console.log("in populate type"); + species = $('#species').val(); + group = $('#group').val(); + type_list = this.jdata.types[species][group]; + redo_dropdown($('#type'), type_list); + return populate_dataset(); +}; +window.populate_type = populate_type; +populate_dataset = function() { + var dataset_list, group, species, type; + console.log("in populate dataset"); + species = $('#species').val(); + group = $('#group').val(); + type = $('#type').val(); + console.log("sgt:", species, group, type); + dataset_list = this.jdata.datasets[species][group][type]; + console.log("pop_dataset:", dataset_list); + return redo_dropdown($('#dataset'), dataset_list); +}; +window.populate_dataset = populate_dataset; +redo_dropdown = function(dropdown, items) { + var item, _i, _len, _results; + console.log("in redo:", dropdown, items); + dropdown.empty(); + _results = []; + + if (dropdown.attr('id') == "group"){ + group_family_list = []; + for (_i = 0, _len = items.length; _i < _len; _i++) { + item = items[_i]; + group_family = item[2].toString().split(":")[1] + group_family_list.push([item[0], item[1], group_family]) + } + + current_family = "" + this_opt_group = null + for (_i = 0, _len = group_family_list.length; _i < _len; _i++) { + item = group_family_list[_i]; + if (item[2] != "None" && current_family == ""){ + current_family = item[2] + this_opt_group = $("<optgroup label=\"" + item[2] + "\">") + this_opt_group.append($("<option />").val(item[0]).text(item[1])); + } else if (current_family != "" && item[2] == current_family){ + this_opt_group.append($("<option />").val(item[0]).text(item[1])); + if (_i == group_family_list.length - 1){ + _results.push(dropdown.append(this_opt_group)) + } + } else if (current_family != "" && item[2] != current_family && item[2] != "None"){ + current_family = item[2] + _results.push(dropdown.append(this_opt_group)) + this_opt_group = $("<optgroup label=\"" + current_family + "\">") + this_opt_group.append($("<option />").val(item[0]).text(item[1])); + if (_i == group_family_list.length - 1){ + _results.push(dropdown.append(this_opt_group)) + } + } else if (current_family != "" && this_opt_group != null && item[2] == "None"){ + _results.push(dropdown.append(this_opt_group)) + current_family = "" + _results.push(dropdown.append($("<option />").val(item[0]).text(item[1]))); + } else { + _results.push(dropdown.append($("<option />").val(item[0]).text(item[1]))); + } + } + } else if (dropdown.attr('id') == "type"){ + type_family_list = []; + for (_i = 0, _len = items.length; _i < _len; _i++) { + item = items[_i]; + type_family_list.push([item[0], item[1], item[2]]) + } + + current_family = "" + this_opt_group = null + for (_i = 0, _len = type_family_list.length; _i < _len; _i++) { + item = type_family_list[_i]; + if (item[2] != "None" && current_family == ""){ + current_family = item[2] + this_opt_group = $("<optgroup label=\"" + item[2] + "\">") + this_opt_group.append($("<option />").val(item[0]).text(item[1])); + if (_i == type_family_list.length - 1){ + _results.push(dropdown.append(this_opt_group)) + } + } else if (current_family != "" && item[2] == current_family){ + this_opt_group.append($("<option />").val(item[0]).text(item[1])); + if (_i == type_family_list.length - 1){ + _results.push(dropdown.append(this_opt_group)) + } + } else if (current_family != "" && item[2] != current_family && item[2] != "None"){ + current_family = item[2] + _results.push(dropdown.append(this_opt_group)) + this_opt_group = $("<optgroup label=\"" + current_family + "\">") + this_opt_group.append($("<option />").val(item[0]).text(item[1])); + if (_i == type_family_list.length - 1){ + _results.push(dropdown.append(this_opt_group)) + } + } else { + _results.push(dropdown.append(this_opt_group)) + current_family = "" + _results.push(dropdown.append($("<option />").val(item[0]).text(item[1]))); + } + } + } else { + for (_i = 0, _len = items.length; _i < _len; _i++) { + item = items[_i]; + if (item.length > 2){ + _results.push(dropdown.append($("<option data-id=\""+item[0]+"\" />").val(item[1]).text(item[2]))); + } else { + _results.push(dropdown.append($("<option />").val(item[0]).text(item[1]))); + } + } + } + + return _results; +}; +$('#species').change((function(_this) { + return function() { + return populate_group(); + }; +})(this)); +$('#group').change((function(_this) { + return function() { + if ($('#type').length > 0) { //This is to determine if it's the index page or the submit_trait page (which only has species and group selection and no make default option) + return populate_type(); + } + else { + return false + } + }; +})(this)); +$('#type').change((function(_this) { + return function() { + return populate_dataset(); + }; +})(this)); +open_window = function(url, name) { + var options; + options = "menubar=yes,toolbar=yes,titlebar=yes,location=yes,resizable=yes,status=yes,scrollbars=yes,directories=yes,width=900"; + return open(url, name, options).focus(); +}; +group_info = function() { + var group, species, url; + species = $('#species').val(); + group = $('#group').val(); + url = "http://gn1.genenetwork.org/" + species + "Cross.html#" + group; + return open_window(url, "Group Info"); +}; +$('#group_info').click(group_info); +dataset_info = function() { + var dataset, url; + accession_id = $('#dataset option:selected').data("id"); + name = $('#dataset option:selected').val(); + if (accession_id != "None") { + url = "http://genenetwork.org/webqtl/main.py?FormID=sharinginfo&GN_AccessionId=" + accession_id + "&InfoPageName=" + name; + } else { + url = "http://genenetwork.org/webqtl/main.py?FormID=sharinginfo&InfoPageName=" + name; + } + return open_window(url, "Dataset Info"); +}; +$('#dataset_info').click(dataset_info); +make_default = function() { + var holder, item, jholder, _i, _len, _ref; + alert("The current settings are now your default.") + holder = {}; + _ref = ['species', 'group', 'type', 'dataset']; + for (_i = 0, _len = _ref.length; _i < _len; _i++) { + item = _ref[_i]; + holder[item] = $("#" + item).val(); + } + jholder = JSON.stringify(holder); + return $.cookie('search_defaults', jholder, { + expires: 365 + }); +}; +apply_default = function() { + var defaults, item, populate_function, _i, _len, _ref, _results; + defaults = $.cookie('search_defaults'); + if (defaults) { + defaults = $.parseJSON(defaults); + } else { + defaults = { + species: "mouse", + group: "BXD", + type: "Hippocampus mRNA", + dataset: "HC_M2_0606_P" + }; + } + + _ref = [['species', 'group'], ['group', 'type'], ['type', 'dataset'], ['dataset', null]]; + _results = []; + for (_i = 0, _len = _ref.length; _i < _len; _i++) { + item = _ref[_i]; + $("#" + item[0]).val(defaults[item[0]]); + if (item[1]) { + populate_function = "populate_" + item[1]; + console.log("Calling:", populate_function); + _results.push(window[populate_function]()); + } else { + _results.push(void 0); + } + } + return _results; +}; +check_search_term = function() { + var or_search_term, and_search_term; + or_search_term = $('#or_search').val(); + and_search_term = $('#and_search').val(); + console.log("or_search_term:", or_search_term); + console.log("and_search_term:", and_search_term); + if (or_search_term === "" && and_search_term === "") { + alert("Please enter one or more search terms or search equations."); + return false; + } +}; +$("#make_default").click(make_default); +$("#btsearch").click(check_search_term); diff --git a/wqflask/wqflask/static/new/javascript/initialize_show_trait_tables.js b/wqflask/wqflask/static/new/javascript/initialize_show_trait_tables.js index 05e4d547..6ca92fb6 100644 --- a/wqflask/wqflask/static/new/javascript/initialize_show_trait_tables.js +++ b/wqflask/wqflask/static/new/javascript/initialize_show_trait_tables.js @@ -40,9 +40,9 @@ build_columns = function() { 'data': null, 'render': function(data, type, row, meta) { if (data.value == null) { - return '<input type="text" data-value="x" data-qnorm="x" data-zscore="x" name="value:' + data.name + '" style="text-align: right;" class="trait_value_input edit_sample_value" value="x" size=6 maxlength=6>' + return '<input type="text" data-value="x" data-qnorm="x" data-zscore="x" name="value:' + data.name + '" style="text-align: right;" class="trait_value_input edit_sample_value" value="x" size=' + js_data.max_digits[0] + '>' } else { - return '<input type="text" data-value="' + data.value.toFixed(3) + '" data-qnorm="' + js_data['qnorm_values'][0][parseInt(data.this_id) - 1] + '" data-zscore="' + js_data['zscore_values'][0][parseInt(data.this_id) - 1] + '" name="value:' + data.name + '" class="trait_value_input edit_sample_value" value="' + data.value.toFixed(3) + '" size=6 maxlength=6>' + return '<input type="text" data-value="' + data.value.toFixed(3) + '" data-qnorm="' + js_data['qnorm_values'][0][parseInt(data.this_id) - 1] + '" data-zscore="' + js_data['zscore_values'][0][parseInt(data.this_id) - 1] + '" name="value:' + data.name + '" class="trait_value_input edit_sample_value" value="' + data.value.toFixed(3) + '" size=' + js_data.max_digits[0] + '>' } } } @@ -66,9 +66,9 @@ build_columns = function() { 'data': null, 'render': function(data, type, row, meta) { if (data.variance == null) { - return '<input type="text" data-value="x" data-qnorm="x" data-zscore="x" name="value:' + data.name + '" class="trait_value_input edit_sample_se" value="x" size=6 maxlength=6>' + return '<input type="text" data-value="x" data-qnorm="x" data-zscore="x" name="value:' + data.name + '" class="trait_value_input edit_sample_se" value="x" size=6>' } else { - return '<input type="text" data-value="' + data.variance.toFixed(3) + '" data-qnorm="x" data-zscore="x" name="value:' + data.name + '" class="trait_value_input edit_sample_se" value="' + data.variance.toFixed(3) + '" size=6 maxlength=6>' + return '<input type="text" data-value="' + data.variance.toFixed(3) + '" data-qnorm="x" data-zscore="x" name="value:' + data.name + '" class="trait_value_input edit_sample_se" value="' + data.variance.toFixed(3) + '" size=6>' } } } @@ -150,8 +150,8 @@ var primary_table = $('#samples_primary').DataTable( { } for (i=0; i < attr_keys.length; i++) { - $('td', row).eq(attribute_start_pos + i).addClass("column_name-" + js_data.attributes[attr_keys[i]].name) - $('td', row).eq(attribute_start_pos + i).attr("style", "text-align: " + js_data.attributes[attr_keys[i]].alignment + "; padding-top: 2px; padding-bottom: 0px;") + $('td', row).eq(attribute_start_pos + i + 1).addClass("column_name-" + js_data.attributes[attr_keys[i]].name) + $('td', row).eq(attribute_start_pos + i + 1).attr("style", "text-align: " + js_data.attributes[attr_keys[i]].alignment + "; padding-top: 2px; padding-bottom: 0px;") } }, 'data': js_data['sample_lists'][0], @@ -225,4 +225,4 @@ if (js_data.sample_lists.length > 1){ } ); other_table.draw(); //ZS: This makes the table adjust its height properly on initial load -}
\ No newline at end of file +} diff --git a/wqflask/wqflask/static/new/javascript/network_graph.js b/wqflask/wqflask/static/new/javascript/network_graph.js index 02c3b817..480443ee 100644 --- a/wqflask/wqflask/static/new/javascript/network_graph.js +++ b/wqflask/wqflask/static/new/javascript/network_graph.js @@ -115,7 +115,7 @@ window.onload=function() { correlation_line = '<b>Sample r: ' + this.data().correlation + '</b><br>' p_value_line = 'Sample p(r): ' + this.data().p_value + '<br>' overlap_line = 'Overlap: ' + this.data().overlap + '<br>' - scatter_plot = '<a href="' + gn2_url + '/corr_scatter_plot?dataset_1=' + this.data().source.split(":")[1] + '&dataset_2=' + this.data().target.split(":")[1] + '&trait_1=' + this.data().source.split(":")[0] + '&trait_2=' + this.data().target.split(":")[0] + '" >View Scatterplot</a>' + scatter_plot = '<a href="' + gn2_url + '/corr_scatter_plot?method=pearson&dataset_1=' + this.data().source.split(":")[1] + '&dataset_2=' + this.data().target.split(":")[1] + '&trait_1=' + this.data().source.split(":")[0] + '&trait_2=' + this.data().target.split(":")[0] + '" >View Scatterplot</a>' return correlation_line + p_value_line + overlap_line + scatter_plot }, position: { diff --git a/wqflask/wqflask/static/new/javascript/search_results.js b/wqflask/wqflask/static/new/javascript/search_results.js index ecb1220d..48b9b7be 100644 --- a/wqflask/wqflask/static/new/javascript/search_results.js +++ b/wqflask/wqflask/static/new/javascript/search_results.js @@ -326,4 +326,15 @@ $(function() { } ); } + apply_default = function() { + let default_collection_id = $.cookie('default_collection'); + if (default_collection_id) { + let the_option = $('[name=existing_collection] option').filter(function() { + return ($(this).text().split(":")[0] == default_collection_id); + }) + the_option.prop('selected', true); + } + } + apply_default(); + });
\ No newline at end of file diff --git a/wqflask/wqflask/static/new/javascript/show_trait.js b/wqflask/wqflask/static/new/javascript/show_trait.js index 9d356570..569046d3 100644 --- a/wqflask/wqflask/static/new/javascript/show_trait.js +++ b/wqflask/wqflask/static/new/javascript/show_trait.js @@ -585,6 +585,16 @@ get_table_contents_for_form_submit = function(form_id) { var corr_input_list = ['sample_vals', 'corr_type', 'primary_samples', 'trait_id', 'dataset', 'group', 'tool_used', 'form_url', 'corr_sample_method', 'corr_samples_group', 'corr_dataset', 'min_expr', 'corr_return_results', 'location_type', 'loc_chr', 'min_loc_mb', 'max_loc_mb', 'p_range_lower', 'p_range_upper'] +$(".test_corr_compute").on("click", (function(_this) { + return function() { + $('input[name=tool_used]').val("Correlation"); + $('input[name=form_url]').val("/test_corr_compute"); + $('input[name=wanted_inputs]').val(corr_input_list.join(",")); + url = "/loading"; + return submit_special(url); + }; +})(this)); + $(".corr_compute").on("click", (function(_this) { return function() { $('input[name=tool_used]').val("Correlation"); @@ -634,7 +644,7 @@ populate_sample_attributes_values_dropdown = function() { return _results; }; -if (Object.keys(js_data.attributes).length){ +if (js_data.categorical_attr_exists == "true"){ populate_sample_attributes_values_dropdown(); } diff --git a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js index 3ae52975..09e9d024 100644 --- a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js +++ b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js @@ -143,11 +143,11 @@ $('input[name=display_all]').change((function(_this) { //ZS: This is a list of inputs to be passed to the loading page, since not all inputs on the trait page are relevant to mapping var mapping_input_list = ['temp_uuid', 'trait_id', 'dataset', 'tool_used', 'form_url', 'method', 'transform', 'trimmed_markers', 'selected_chr', 'chromosomes', 'mapping_scale', 'sample_vals', 'score_type', 'suggestive', 'significant', 'num_perm', 'permCheck', 'perm_output', 'perm_strata', 'categorical_vars', 'num_bootstrap', 'bootCheck', 'bootstrap_results', - 'LRSCheck', 'covariates', 'maf', 'use_loco', 'manhattan_plot', 'control_marker', 'control_marker_db', 'do_control', 'genofile', + 'LRSCheck', 'covariates', 'maf', 'use_loco', 'manhattan_plot', 'control_marker', 'do_control', 'genofile', 'pair_scan', 'startMb', 'endMb', 'graphWidth', 'lrsMax', 'additiveCheck', 'showSNP', 'showGenes', 'viewLegend', 'haplotypeAnalystCheck', 'mapmethod_rqtl_geno', 'mapmodel_rqtl_geno', 'temp_trait', 'group', 'species', 'reaper_version', 'primary_samples'] -$(".rqtl-tab, #rqtl_geno_compute").on("click", (function(_this) { +$(".rqtl-geno-tab, #rqtl_geno_compute").on("click", (function(_this) { return function() { if ($(this).hasClass('active') || $(this).attr('id') == "rqtl_geno_compute"){ var form_data, url; @@ -175,7 +175,6 @@ $(".gemma-tab, #gemma_compute").on("click", (function(_this) { return function() { if ($(this).hasClass('active') || $(this).attr('id') == "gemma_compute"){ var form_data, url; - console.log("RUNNING GEMMA"); url = "/loading"; $('input[name=method]').val("gemma"); $('input[name=selected_chr]').val($('#chr_gemma').val()); diff --git a/wqflask/wqflask/submit_bnw.py b/wqflask/wqflask/submit_bnw.py index a0e84c8c..b21a88cc 100644 --- a/wqflask/wqflask/submit_bnw.py +++ b/wqflask/wqflask/submit_bnw.py @@ -3,7 +3,8 @@ from base import data_set from utility import helper_functions import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) + def get_bnw_input(start_vars): logger.debug("BNW VARS:", start_vars) diff --git a/wqflask/wqflask/templates/base.html b/wqflask/wqflask/templates/base.html index ccb2ac5a..ddb1d272 100644 --- a/wqflask/wqflask/templates/base.html +++ b/wqflask/wqflask/templates/base.html @@ -69,6 +69,7 @@ <ul class="dropdown-menu"> <li><a href="{{ url_for('references_blueprint.references') }}">References</a></li> <li><a href="/tutorials">Tutorials/Primers</a></li> + <li><a href="{{ url_for('blogs_blueprint.blogs_list') }}">Blogs</a></li> <li><a href="{{ url_for('glossary_blueprint.glossary') }}">Glossary of Term</a></li> <li><a href="http://gn1.genenetwork.org/faq.html">FAQ</a></li> <li><a href="{{ url_for('policies_blueprint.policies') }}">Policies</a></li> @@ -191,7 +192,7 @@ Translational Systems Genetics of Mitochondria, Metabolism, and Aging (R01AG043930, 2013-2018) </li> <li> - <a href="http://www.iniastress.org/">NIAAA</a> + <a href="https://www.ohsu.edu/iniastress-consortium">NIAAA</a> Integrative Neuroscience Initiative on Alcoholism (U01 AA016662, U01 AA013499, U24 AA013513, U01 AA014425, 2006-2017) </li> <li> @@ -257,7 +258,6 @@ <script src="{{ url_for('js', filename='jquery-ui/jquery-ui.min.js') }}" type="text/javascript"></script> <script language="javascript" type="text/javascript" src="{{ url_for('js', filename='colorbox/jquery.colorbox-min.js') }}"></script> - <!--<script type="text/javascript" src="/static/new/javascript/login.js"></script>--> <script language="javascript" type="text/javascript" src="{{ url_for('js', filename='js_alt/parsley.min.js') }}"></script> diff --git a/wqflask/wqflask/templates/blogs.html b/wqflask/wqflask/templates/blogs.html new file mode 100644 index 00000000..314eb733 --- /dev/null +++ b/wqflask/wqflask/templates/blogs.html @@ -0,0 +1,12 @@ +{% extends "base.html" %} +{% block title %}Blogs {% endblock %} +{% block css %} +<link rel="stylesheet" type="text/css" href="/static/new/css/markdown.css" /> +{% endblock %} +{% block content %} +<div class="github-btn-container"> +</div> +<div id="markdown" class="container"> + {{ rendered_markdown|safe }} +</div> +{% endblock %}
\ No newline at end of file diff --git a/wqflask/wqflask/templates/blogs_list.html b/wqflask/wqflask/templates/blogs_list.html new file mode 100644 index 00000000..6bad4628 --- /dev/null +++ b/wqflask/wqflask/templates/blogs_list.html @@ -0,0 +1,52 @@ +{% extends "base.html" %} +{% block title %}Blogs {% endblock %} +{% block css %} +<style type="text/css"> +.container { + height: 100vh; +} + +.blog_year { + font-weight: bold; + font-size: 48px; + padding: 12px 10px; +} + +.blog_title { + padding: 10px 15px; + +} + + + +.blog_title a { + font-size: 1.3em; + padding-left: 10px; + letter-spacing: 0.07em; + text-decoration: underline; +} +</style> +<link rel="stylesheet" type="text/css" href="/static/new/css/markdown.css" /> +{% endblock %} +{% block content %} +<div id="markdown" class="container"> + <div> + {% for year, year_blogs in blogs.items() %} + <div class="blog_year"> + <h3>{{year}}</h3> + </div> + {%for blog in year_blogs%} + <div> + <div class="blog_title"> + <ul> + <li> + <a href="{{ url_for('blogs_blueprint.display_blog',blog_path = blog.full_path)}}">{{blog['subtitle']}}</a> + </li> + </ul> + </div> + </div> + {% endfor %} + {%endfor%} + </div> +</div> +{% endblock %}
\ No newline at end of file diff --git a/wqflask/wqflask/templates/collections/add.html b/wqflask/wqflask/templates/collections/add.html index b4e5385b..0398c6e4 100644 --- a/wqflask/wqflask/templates/collections/add.html +++ b/wqflask/wqflask/templates/collections/add.html @@ -49,8 +49,20 @@ </div> <script> - $('#add_form').parsley(); - $('#add_form').on('submit', function(){ - parent.jQuery.colorbox.close(); - }); + $('#add_form').parsley(); + $('#add_form').on('submit', function(){ + parent.jQuery.colorbox.close(); + }); + + apply_default = function() { + let default_collection_id = $.cookie('default_collection'); + if (default_collection_id) { + let the_option = $('[name=existing_collection] option').filter(function() { + return ($(this).val().split(":")[0] == default_collection_id); + }) + the_option.prop('selected', true); + } + } + + apply_default(); </script> diff --git a/wqflask/wqflask/templates/collections/view.html b/wqflask/wqflask/templates/collections/view.html index 8d5e3616..9ec98ab1 100644 --- a/wqflask/wqflask/templates/collections/view.html +++ b/wqflask/wqflask/templates/collections/view.html @@ -14,6 +14,7 @@ <span id="collection_name">{{ uc.name }}</span> <input type="text" name="new_collection_name" style="font-size: 20px; display: none; width: 500px;" class="form-control" placeholder="{{ uc.name }}"> <button class="btn btn-default" style="display: inline;" id="change_collection_name">Change Collection Name</button> + <button class="btn btn-default" style="display: inline;" id="make_default">Make Default</button> </h1> <h3>This collection has {{ '{}'.format(numify(trait_obs|count, "record", "records")) }}</h3> @@ -231,6 +232,21 @@ $('#collection_name').css('display', 'inline'); } }); + + make_default = function() { + alert("The current collection is now your default collection.") + let uc_id = $('#uc_id').val(); + $.cookie('default_collection', uc_id, { + expires: 365, + path: '/' + }); + + let default_collection_id = $.cookie('default_collection'); + }; + + $("#make_default").on("click", function(){ + make_default(); + }); }); </script> diff --git a/wqflask/wqflask/templates/correlation_page.html b/wqflask/wqflask/templates/correlation_page.html index a9a3e1a0..4cad2749 100644 --- a/wqflask/wqflask/templates/correlation_page.html +++ b/wqflask/wqflask/templates/correlation_page.html @@ -4,7 +4,7 @@ <link rel="stylesheet" type="text/css" href="{{ url_for('css', filename='DataTables/css/jquery.dataTables.css') }}" /> <link rel="stylesheet" type="text/css" href="{{ url_for('js', filename='DataTablesExtensions/buttonsBootstrap/css/buttons.bootstrap.css') }}" /> <link rel="stylesheet" type="text/css" href="{{ url_for('js', filename='DataTablesExtensions/buttonStyles/css/buttons.dataTables.min.css') }}"> - <link rel="stylesheet" type="text/css" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.1/css/all.min.css"> + <link rel="stylesheet" type="text/css" href="{{ url_for('css', filename='fontawesome/css/all.min.css') }}"/> <link rel="stylesheet" type="text/css" href="/static/new/css/trait_list.css" /> <link rel="stylesheet" type="text/css" href="/static/new/css/show_trait.css" /> {% endblock %} @@ -141,7 +141,7 @@ <script language="javascript" type="text/javascript" src="{{ url_for('js', filename='DataTablesExtensions/buttons/js/dataTables.buttons.min.js') }}"></script> <script language="javascript" type="text/javascript" src="{{ url_for('js', filename='DataTablesExtensions/buttons/js/buttons.html5.min.js') }}"></script> <script language="javascript" type="text/javascript" src="{{ url_for('js', filename='DataTablesExtensions/plugins/sorting/natural.js') }}"></script> - <script language="javascript" type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.1/js/all.min.js"></script> + <script language="javascript" type="text/javascript" src="{{ url_for('js', filename='fontawesome/js/all.min.js') }}"></script> <script language="javascript" type="text/javascript" src="{{ url_for('js', filename='DataTablesExtensions/scroller/js/dataTables.scroller.min.js') }}"></script> diff --git a/wqflask/wqflask/templates/edit_trait.html b/wqflask/wqflask/templates/edit_trait.html new file mode 100644 index 00000000..7d4c65f8 --- /dev/null +++ b/wqflask/wqflask/templates/edit_trait.html @@ -0,0 +1,234 @@ +{% extends "base.html" %} +{% block title %}Trait Submission{% endblock %} +{% block content %} +<!-- Start of body --> +Edit Trait for Published Database +Submit Trait | Reset + +{% if diff %} + +<div class="container"> + <details class="col-sm-12 col-md-10 col-lg-12"> + <summary> + <h2>Update History</h2> + </summary> + <table class="table"> + <tbody> + <tr> + <th>Timestamp</th> + <th>Editor</th> + <th>Field</th> + <th>Diff</th> + </tr> + {% set ns = namespace(display_cell=True) %} + + {% for timestamp, group in diff %} + {% set ns.display_cell = True %} + {% for i in group %} + <tr> + {% if ns.display_cell and i.timestamp == timestamp %} + + {% set author = i.author %} + {% set timestamp_ = i.timestamp %} + + {% else %} + + {% set author = "" %} + {% set timestamp_ = "" %} + + {% endif %} + <td>{{ timestamp_ }}</td> + <td>{{ author }}</td> + <td>{{ i.diff.field }}</td> + <td><pre>{{ i.diff.diff }}</pre></td> + {% set ns.display_cell = False %} + </tr> + {% endfor %} + {% endfor %} + </tbody> + </table> + </details> + +</div> + +{% endif %} + +<form id="edit-form" class="form-horizontal" method="post" action="/trait/update"> + <h2 class="text-center">Trait Information:</h2> + <div class="form-group"> + <label for="pubmed-id" class="col-sm-2 control-label">Pubmed ID:</label> + <!-- Do not enter PubMed_ID if this trait has not been Published. + If the PubMed_ID you entered is alreday stored in our + database, all the following fields except Post Publication + Description will be ignored. Do not enter any non-digit + character in this field. --> + <div class="col-sm-8"> + <textarea name="pubmed-id" class="form-control" rows="1">{{ publish_xref.publication_id |default('', true) }}</textarea> + <input name="old_id_" class="changed" type="hidden" value="{{ publish_xref.publication_id |default('', true) }}"/> + </div> + </div> + <div class="form-group"> + <label for="pre-pub-desc" class="col-sm-2 control-label">Pre Publication Description:</label> + <div class="col-sm-8"> + <textarea name="pre-pub-desc" class="form-control" rows="4">{{ phenotype.pre_pub_description |default('', true) }}</textarea> + <input name="old_pre_pub_description" class="changed" type="hidden" value="{{ phenotype.pre_pub_description |default('', true) }}"/> + </div> + <!-- If the PubMed ID is entered, the Post Publication Description + will be shown to all users. If there is no PubMed ID, and the + Pre Publication Description is entered, only you and + authorized users can see the Post Publication Description --> + </div> + <div class="form-group"> + <label for="post-pub-desc" class="col-sm-2 control-label">Post Publication Description:</label> + <div class="col-sm-8"> + <textarea name="post-pub-desc" class="form-control" rows="4">{{ phenotype.post_pub_description |default('', true) }}</textarea> + <input name="old_post_pub_description" class="changed" type="hidden" value="{{ phenotype.post_pub_description |default('', true) }}"/> + </div> + </div> + <div class="form-group"> + <label for="orig-desc" class="col-sm-2 control-label">Original Description:</label> + <div class="col-sm-8"> + <textarea name="orig-desc" class="form-control" rows="4">{{ phenotype.original_description |default('', true) }}</textarea> + <input name="old_original_description" class="changed" type="hidden" value="{{ phenotype.original_description |default('', true) }}"/> + </div> + </div> + <div class="form-group"> + <label for="units" class="col-sm-2 control-label">Units:</label> + <div class="col-sm-8"> + <textarea name="units" class="form-control" rows="1">{{ phenotype.units |default('', true) }}</textarea> + <input name="old_units" class="changed" type="hidden" value="{{ phenotype.units |default('', true) }}"/> + </div> + </div> + <div class="form-group"> + <label for="pre-pub-abbrev" class="col-sm-2 control-label"> + Pre Publication Abbreviation: + </label> + <div class="col-sm-8"> + <textarea name="pre-pub-abbrev" class="form-control" rows="1">{{ phenotype.pre_pub_abbreviation |default('', true) }}</textarea> + <input name="old_pre_pub_abbreviation" class="changed" type="hidden" value="{{ phenotype.pre_pub_abbreviation |default('', true) }}"/> + </div> + </div> + <div class="form-group"> + <label for="post-pub-abbrev" class="col-sm-2 control-label"> + Post Publication Abbreviation: + </label> + <div class="col-sm-8"> + <textarea name="post-pub-abbrev" class="form-control" rows="1">{{ phenotype.post_pub_abbreviation |default('', true) }}</textarea> + <input name="old_post_pub_abbreviation" class="changed" type="hidden" value="{{ phenotype.post_pub_abbreviation |default('', true) }}"/> + </div> + </div> + <div class="form-group"> + <label for="labcode" class="col-sm-2 control-label"> + Lab Code: + </label> + <div class="col-sm-8"> + <textarea name="labcode" class="form-control" rows="1">{{ phenotype.lab_code |default('', true) }}</textarea> + <input name="old_lab_code" class="changed" type="hidden" value="{{ phenotype.lab_code |default('', true) }}"/> + </div> + </div> + <div class="form-group"> + <label for="submitter" class="col-sm-2 control-label"> + Submitter: + </label> + <div class="col-sm-8"> + <textarea name="submitter" class="form-control" rows="1">{{ phenotype.submitter |default('', true) }}</textarea> + <input name="old_submitter" class="changed" type="hidden" value="{{ phenotype.submitter |default('', true) }}"/> + </div> + </div> + <div class="form-group"> + <label for="owner" class="col-sm-2 control-label">Owner:</label> + <div class="col-sm-8"> + <textarea name="owner" class="form-control" rows="1">{{ phenotype.owner |default('', true) }}</textarea> + <input name="old_owner" class="changed" type="hidden" value="{{ phenotype.owner |default('', true) }}"/> + </div> + </div> + <div class="form-group"> + <label for="authorized-users" class="col-sm-2 control-label"> + Authorized Users: + </label> + <div class="col-sm-8"> + <textarea name="authorized-users" class="form-control" rows="1">{{ phenotype.authorized_users |default('', true) }}</textarea> + <input name="old_authorized_users" class="changed" type="hidden" value="{{ phenotype.authorized_users |default('', true) }}"/> + </div> + </div> + <div class="form-group"> + <label for="authors" class="col-sm-2 control-label">Authors:</label> + <div class="col-sm-8"> + <textarea name="authors" class="form-control" rows="2">{{ publication.authors |default('', true) }}</textarea> + <input name="old_authors" class="changed" type="hidden" value="{{ publication.authors |default('', true) }}"/> + </div> + </div> + <div class="form-group"> + <label for="title" class="col-sm-2 control-label">Title:</label> + <div class="col-sm-8"> + <textarea name="title" class="form-control" rows="2">{{ publication.title |default('', true) }}</textarea> + <input name="old_title" class="changed" type="hidden" value="{{ publication.title |default('', true) }}"/> + </div> + </div> + <div class="form-group"> + <label for="abstract" class="col-sm-2 control-label">Abstract:</label> + <div class="col-sm-8"> + <textarea name="abstract" class="form-control" rows="6">{{ publication.abstract |default('', true) }}</textarea> + <input name="old_abstract" class="changed" type="hidden" value="{{ publication.abstract |default('', true) }}"/> + </div> + </div> + <div class="form-group"> + <label for="journal" class="col-sm-2 control-label">Journal:</label> + <div class="col-sm-8"> + <textarea name="journal" class="form-control" rows="1">{{ publication.journal |default('', true) }}</textarea> + <input name="old_journal" class="changed" type="hidden" value="{{ publication.journal_ |default('', true) }}"/> + </div> + </div> + <div class="form-group"> + <label for="pages" class="col-sm-2 control-label">Pages:</label> + <div class="col-sm-8"> + <textarea name="pages" class="form-control" rows="1">{{ publication.pages |default('', true) }}</textarea> + <input name="old_pages" class="changed" type="hidden" value="{{ publication.pages |default('', true) }}"/> + </div> + </div> + <div class="form-group"> + <label for="month" class="col-sm-2 control-label">Month:</label> + <div class="col-sm-8"> + <textarea name="month" class="form-control" rows="1">{{ publication.month |default('', true) }}</textarea> + <input name="old_month" class="changed" type="hidden" value="{{ publication.month |default('', true) }}"/> + </div> + </div> + <div class="form-group"> + <label for="year" class="col-sm-2 control-label">Year:</label> + <div class="col-sm-8"> + <textarea name="year" class="form-control" rows="1">{{ publication.year |default('', true) }}</textarea> + <input name="old_year" class="changed" type="hidden" value="{{ publication.year |default('', true) }}"/> + </div> + </div> + <div class="form-group"> + <label for="sequence" class="col-sm-2 control-label">Sequence:</label> + <div class="col-sm-8"> + <textarea name="sequence" class="form-control" rows="6">{{ publish_xref.sequence |default('', true) }}</textarea> + <input name="old_sequence" class="changed" type="hidden" value="{{ publication.sequence |default('', true) }}"/> + </div> + </div> + <div class="controls" style="display:block; margin-left: 40%; margin-right: 20%;"> + <input name="dataset-name" class="changed" type="hidden" value="{{ publish_xref.id_ }}"/> + <input name="phenotype-id" class="changed" type="hidden" value="{{ publish_xref.phenotype_id }}"/> + <input name="inbred-set-id" class="changed" type="hidden" value="{{ publish_xref.inbred_set_id }}"/> + <input name="comments" class="changed" type="hidden" value="{{ publish_xref.comments }}"/> + <input type="submit" style="width: 125px; margin-right: 25px;" class="btn btn-primary form-control col-xs-2 changed" value="Submit Change"> + <input type="reset" style="width: 110px;" class="btn btn-primary form-control col-xs-2 changed" onClick="window.location.reload();" value="Reset"> + </div> +</form> + +{%endblock%} + +{% block js %} +<script> + gn_server_url = "{{ gn_server_url }}"; + function MarkAsChanged(){ + $(this).addClass("changed"); + } + $(":input").blur(MarkAsChanged).change(MarkAsChanged); + + $("input[type=submit]").click(function(){ + $(":input:not(.changed)").attr("disabled", "disabled"); + }); +</script> +{% endblock %} diff --git a/wqflask/wqflask/templates/gsearch_gene.html b/wqflask/wqflask/templates/gsearch_gene.html index 6fd0abe8..5549ac8a 100644 --- a/wqflask/wqflask/templates/gsearch_gene.html +++ b/wqflask/wqflask/templates/gsearch_gene.html @@ -7,7 +7,7 @@ {% block content %} <!-- Start of body --> - <div class="container" style="width: 2000px;"> + <div class="container"> <h3>GN searched for the term(s) <b>"{{ terms }}"</b> in 754 datasets and 39,765,944 traits across 10 species<br/> and found <b>{{ trait_count }}</b> results that match your query.<br/> @@ -31,7 +31,7 @@ </form> <br /> <br /> - <div style="width: 100%;"> + <div style="min-width: 2000px; width: 100%;"> <table id="trait_table" class="table-hover table-striped cell-border" style="float: left;"> <tbody> <td colspan="100%" align="center"><br><b><font size="15">Loading...</font></b><br></td> @@ -48,6 +48,7 @@ {% block js %} <script language="javascript" type="text/javascript" src="{{ url_for('js', filename='js_alt/md5.min.js') }}"></script> <script language="javascript" type="text/javascript" src="{{ url_for('js', filename='DataTables/js/jquery.dataTables.min.js') }}"></script> + <script language="javascript" type="text/javascript" src="{{ url_for('js', filename='DataTablesExtensions/scroller/js/dataTables.scroller.min.js') }}"></script> <script language="javascript" type="text/javascript" src="{{ url_for('js', filename='jszip/jszip.min.js') }}"></script> <script language="javascript" type="text/javascript" src="{{ url_for('js', filename='DataTablesExtensions/plugins/sorting/natural.js') }}"></script> <script language="javascript" type="text/javascript" src="{{ url_for('js', filename='DataTablesExtensions/colReorder/js/dataTables.colReorder.js') }}"></script> @@ -55,7 +56,19 @@ <script language="javascript" type="text/javascript" src="/static/new/javascript/search_results.js"></script> <script type='text/javascript'> - var the_rows = {{ trait_list|safe }}; + var getParams = function(url) { + let parser = document.createElement('a'); + parser.href = url; + let params = parser.search.substring(1); + if(params.length > 0) { + return ('?'+params); + } + return params; + }; + </script> + + <script type='text/javascript'> + var trait_list = {{ trait_list|safe }}; </script> <script type="text/javascript" charset="utf-8"> @@ -94,8 +107,7 @@ } } - console.time("Creating table"); - $('#trait_table').DataTable( { + var the_table = $('#trait_table').DataTable( { 'drawCallback': function( settings ) { $('#trait_table tr').click(function(event) { if (event.target.type !== 'checkbox' && event.target.tagName.toLowerCase() !== 'a') { @@ -105,26 +117,26 @@ $('.trait_checkbox:checkbox').on("change", change_buttons); }, 'createdRow': function ( row, data, index ) { - $('td', row).eq(0).attr("style", "text-align: center; padding: 4px 10px 2px 10px;"); + $('td', row).eq(0).attr("style", "text-align: center; padding: 0px 10px 2px 13px;"); $('td', row).eq(1).attr("align", "right"); $('td', row).eq(4).attr('title', $('td', row).eq(4).text()); - if ($('td', row).eq(4).text().length > 20) { - $('td', row).eq(4).text($('td', row).eq(4).text().substring(0, 20)); + if ($('td', row).eq(4).text().length > 30) { + $('td', row).eq(4).text($('td', row).eq(4).text().substring(0, 30)); $('td', row).eq(4).text($('td', row).eq(4).text() + '...') } $('td', row).eq(5).attr('title', $('td', row).eq(5).text()); - if ($('td', row).eq(5).text().length > 20) { - $('td', row).eq(5).text($('td', row).eq(5).text().substring(0, 20)); + if ($('td', row).eq(5).text().length > 35) { + $('td', row).eq(5).text($('td', row).eq(5).text().substring(0, 35)); $('td', row).eq(5).text($('td', row).eq(5).text() + '...') } $('td', row).eq(6).attr('title', $('td', row).eq(6).text()); - if ($('td', row).eq(6).text().length > 35) { - $('td', row).eq(6).text($('td', row).eq(6).text().substring(0, 35)); + if ($('td', row).eq(6).text().length > 60) { + $('td', row).eq(6).text($('td', row).eq(6).text().substring(0, 60)); $('td', row).eq(6).text($('td', row).eq(6).text() + '...') } $('td', row).eq(8).attr('title', $('td', row).eq(8).text()); - if ($('td', row).eq(8).text().length > 45) { - $('td', row).eq(8).text($('td', row).eq(8).text().substring(0, 45)); + if ($('td', row).eq(8).text().length > 60) { + $('td', row).eq(8).text($('td', row).eq(8).text().substring(0, 60)); $('td', row).eq(8).text($('td', row).eq(8).text() + '...') } $('td', row).slice(10,14).attr("align", "right"); @@ -142,11 +154,12 @@ $('td', row).eq(12).attr('data-export', $('td', row).eq(12).text()); $('td', row).eq(13).attr('data-export', $('td', row).eq(13).text()); }, - 'data': the_rows, + 'data': trait_list, 'columns': [ - { - 'data': null, + { 'orderDataType': "dom-checkbox", + 'width': "10px", + 'data': null, 'render': function(data, type, row, meta) { return '<input type="checkbox" name="searchResult" class="trait_checkbox checkbox" value="' + data.hmac + '">' } @@ -154,30 +167,35 @@ { 'title': "Index", 'type': "natural", + 'width': "30px", 'data': "index" }, { 'title': "Record", 'type': "natural", - 'data': null, 'orderDataType': "dom-inner-text", + 'width': "60px", + 'data': null, 'render': function(data, type, row, meta) { return '<a target="_blank" href="/show_trait?trait_id=' + data.name + '&dataset=' + data.dataset + '">' + data.name + '</a>' } }, { 'title': "Species", - 'type': "natural", + 'type': "natural", + 'width': "60px", 'data': "species" }, { 'title': "Group", 'type': "natural", + 'width': "150px", 'data': "group" }, { 'title': "Tissue", 'type': "natural", + 'width': "150px", 'data': "tissue" }, { @@ -188,6 +206,7 @@ { 'title': "Symbol", 'type': "natural", + 'width': "60px", 'data': "symbol" }, { @@ -205,25 +224,27 @@ { 'title': "Location", 'type': "natural-minus-na", - 'width': "100px", + 'width': "125px", 'data': "location_repr" }, { 'title': "Mean", 'type': "natural-minus-na", - 'data': "mean", - 'orderSequence': [ "desc", "asc"] + 'orderSequence': [ "desc", "asc"], + 'width': "30px", + 'data': "mean" }, { 'title': "Max<br>LRS<a href=\"{{ url_for('glossary_blueprint.glossary') }}#LRS\" target=\"_blank\" style=\"color: white;\"><sup>?</sup></a>", 'type': "natural-minus-na", + 'width': "60px", 'data': "LRS_score_repr", 'orderSequence': [ "desc", "asc"] }, { - 'title': "Max LRS<br>Location", + 'title': "Max LRS Location", 'type': "natural-minus-na", - 'width': "100px", + 'width': "125px", 'data': "max_lrs_text" }, { @@ -234,18 +255,18 @@ 'orderSequence': [ "desc", "asc"] } ], - 'order': [[1, "asc" ]], - 'sDom': "pitirp", - 'autoWidth': true, - 'iDisplayLength': 500, - 'deferRender': false, - 'paging': true, - 'orderClasses': true, + "order": [[1, "asc" ]], + 'sDom': "iti", + "autoWidth": true, + "bSortClasses": false, 'processing': true, - 'language': { - 'loadingRecords': ' ', - 'processing': 'Loading...' - } + {% if trait_count > 20 %} + "scrollY": "100vh", + "scroller": true, + "scrollCollapse": true + {% else %} + "iDisplayLength": -1 + {% endif %} } ); $('#trait_table').append( @@ -269,8 +290,7 @@ '</tfoot>' ); - console.timeEnd("Creating table"); - + the_table.draw(); }); </script> diff --git a/wqflask/wqflask/templates/gsearch_pheno.html b/wqflask/wqflask/templates/gsearch_pheno.html index 987b51a7..89316cbc 100644 --- a/wqflask/wqflask/templates/gsearch_pheno.html +++ b/wqflask/wqflask/templates/gsearch_pheno.html @@ -48,6 +48,7 @@ {% block js %} <script language="javascript" type="text/javascript" src="{{ url_for('js', filename='js_alt/md5.min.js') }}"></script> <script language="javascript" type="text/javascript" src="{{ url_for('js', filename='DataTables/js/jquery.dataTables.min.js') }}"></script> + <script language="javascript" type="text/javascript" src="{{ url_for('js', filename='DataTablesExtensions/scroller/js/dataTables.scroller.min.js') }}"></script> <script language="javascript" type="text/javascript" src="{{ url_for('js', filename='jszip/jszip.min.js') }}"></script> <script language="javascript" type="text/javascript" src="{{ url_for('js', filename='DataTablesExtensions/plugins/sorting/natural.js') }}"></script> <script language="javascript" type="text/javascript" src="{{ url_for('js', filename='DataTablesExtensions/colReorder/js/dataTables.colReorder.js') }}"></script> @@ -55,7 +56,19 @@ <script language="javascript" type="text/javascript" src="/static/new/javascript/search_results.js"></script> <script type='text/javascript'> - var the_rows = {{ trait_list|safe }}; + var getParams = function(url) { + let parser = document.createElement('a'); + parser.href = url; + let params = parser.search.substring(1); + if(params.length > 0) { + return ('?'+params); + } + return params; + }; + </script> + + <script type='text/javascript'> + var trait_list = {{ trait_list|safe }}; </script> <script type="text/javascript" charset="utf-8"> @@ -94,8 +107,7 @@ } } - console.time("Creating table"); - $('#trait_table').DataTable( { + var the_table = $('#trait_table').DataTable( { 'drawCallback': function( settings ) { $('#trait_table tr').click(function(event) { if (event.target.type !== 'checkbox' && event.target.tagName.toLowerCase() !== 'a') { @@ -130,11 +142,12 @@ $('td', row).eq(9).attr('data-export', $('td', row).eq(9).text()); $('td', row).eq(10).attr('data-export', $('td', row).eq(10).text()); }, - 'data': the_rows, + 'data': trait_list, 'columns': [ { 'data': null, 'orderDataType': "dom-checkbox", + 'width': "10px", 'render': function(data, type, row, meta) { return '<input type="checkbox" name="searchResult" class="trait_checkbox checkbox" value="' + data.hmac + '">' } @@ -142,23 +155,26 @@ { 'title': "Index", 'type': "natural", + 'width': "30px", 'data': "index" }, { 'title': "Species", 'type': "natural", + 'width': "60px", 'data': "species" }, { 'title': "Group", 'type': "natural", - 'width': "10%", + 'width': "100px", 'data': "group" }, { 'title': "Record", 'type': "natural", 'data': null, + 'width': "60px", 'orderDataType': "dom-inner-text", 'render': function(data, type, row, meta) { return '<a target="_blank" href="/show_trait?trait_id=' + data.name + '&dataset=' + data.dataset + '">' + data.display_name + '</a>' @@ -167,7 +183,7 @@ { 'title': "Description", 'type': "natural", - 'width': "25%", + 'width': "500px", 'data': null, 'render': function(data, type, row, meta) { try { @@ -180,13 +196,13 @@ { 'title': "Mean", 'type': "natural-minus-na", - 'width': "10%", + 'width': "30px", 'data': "mean" }, { 'title': "Authors", 'type': "natural", - 'width': "25%", + 'width': "300px", 'data': null, 'render': function(data, type, row, meta) { author_list = data.authors.split(",") @@ -195,13 +211,7 @@ } else{ author_string = data.authors } - - try { - return decodeURIComponent(escape(author_string)) - } catch(err) { - return author_string - } - + return author_string } }, { @@ -209,6 +219,7 @@ 'type': "natural-minus-na", 'data': null, 'orderDataType': "dom-inner-text", + 'width': "25px", 'render': function(data, type, row, meta) { if (data.pubmed_id != "N/A"){ return '<a href="' + data.pubmed_link + '">' + data.pubmed_text + '</a>' @@ -222,33 +233,35 @@ 'title': "Max LRS<a href=\"{{ url_for('glossary_blueprint.glossary') }}#LRS\" target=\"_blank\" style=\"color: white;\"><sup>?</sup></a>", 'type': "natural-minus-na", 'data': "LRS_score_repr", + 'width': "60px", 'orderSequence': [ "desc", "asc"] }, { 'title': "Max LRS Location", 'type': "natural-minus-na", - 'width': "10%", + 'width': "125px", 'data': "max_lrs_text" }, { 'title': "Additive Effect<a href=\"{{ url_for('glossary_blueprint.glossary') }}#A\" target=\"_blank\" style=\"color: white;\"><sup>?</sup></a>", 'type': "natural-minus-na", 'data': "additive", + 'width': "60px", 'orderSequence': [ "desc", "asc"] } ], - 'order': [[1, "asc" ]], - 'sDom': "pitirp", - 'autoWidth': false, - 'deferRender': false, - 'iDisplayLength': 500, - 'paging': true, - 'orderClasses': true, + "order": [[1, "asc" ]], + 'sDom': "iti", + "autoWidth": true, + "bSortClasses": false, 'processing': true, - 'language': { - 'loadingRecords': ' ', - 'processing': 'Loading...' - } + {% if trait_count > 20 %} + "scrollY": "100vh", + "scroller": true, + "scrollCollapse": true + {% else %} + "iDisplayLength": -1 + {% endif %} } ); $('#trait_table').append( @@ -269,7 +282,7 @@ '</tfoot>' ); - console.timeEnd("Creating table"); + the_table.draw(); }); </script> diff --git a/wqflask/wqflask/templates/index_page.html b/wqflask/wqflask/templates/index_page.html index 31846f87..7b103305 100644..100755 --- a/wqflask/wqflask/templates/index_page.html +++ b/wqflask/wqflask/templates/index_page.html @@ -1,88 +1,102 @@ {% extends "base.html" %} {% block title %}GeneNetwork{% endblock %} -{% block content %} -<!-- Start of body --> +{% block css %} +<style TYPE="text/css"> + p.interact { display: none; } -<!-- - <header class="jumbotron subhead" id="overview"> - <div class="container"> - <h1>GeneNetwork</h1> - <p class="lead">Open source bioinformatics for systems genetics</p> - </div> - </header> ---> - <div class="container-fluid"> + + + .tweet { + + padding:5px; + color:#000; + + } + + .media { + padding-bottom:10px; + border-bottom: 1px solid #c8ccc9; + } + + .media img { + + width: 95%; + height: 100%; + border-radius: 5px; + /*transform: scale(1.1); image small?*/ + border:1px solid #c8ccc9; + + } +</style> +{% endblock %} +{% block content %} +<!-- Start of body --> + <div class="container-fluid" style="min-width: 1210px;"> {{ flash_me() }} - <div class="row" style="width: 1400px !important;"> + <div class="row" style="width: 100%;"> - <div class="col-xs-5"> + <div class="col-xs-4" style="margin-right:50px; min-width: 530px; max-width: 550px;"> <section id="search"> - <div class="page-header"> + <div> <h1>Select and search</h1> </div> - <form method="get" action="/search" name="SEARCHFORM"> + <form method="get" action="/search" target="_blank" name="SEARCHFORM"> <fieldset> <div style="padding: 20px" class="form-horizontal"> <div class="form-group"> <label for="species" class="col-xs-1 control-label" style="width: 65px !important;">Species:</label> - <div class="col-xs-10 controls input-append" style="padding-right: 0px;"> - <div class="col-xs-8"> - <select name="species" id="species" class="form-control span3" style="width: 280px !important;"></select> - </div> - <div class="col-xs-4"> - <button type="button" id="make_default" class="btn btn-primary form-control">Make Default</button> + <div class="col-xs-10 controls input-append" style="display: flex; padding-left: 20px;"> + <div class="col-8"> + <select name="species" id="species" class="form-control" style="width: 280px !important;"><option>Loading...</option></select> </div> </div> </div> <div class="form-group"> <label for="group" class="col-xs-1 control-label" style="width: 65px !important;">Group:</label> - <div class="col-xs-10 controls input-append"> - <div class="col-xs-8"> - <select name="group" id="group" class="form-control span3" style="width: 280px !important;"></select> + <div class="col-xs-10 controls input-append" style="display: flex; padding-left: 20px;"> + <div class="col-9"> + <select name="group" id="group" class="form-control" style="width: 340px !important;"><option>Loading...</option></select> <i class="icon-question-sign"></i> </div> + <div class="col-3" style="margin-left: 10px;"> + <button type="button" id="group_info" class="btn btn-primary form-control" style="width: 50px !important;">Info</button> + </div> </div> </div> <div class="form-group"> <label for="tissue" class="col-xs-1 control-label" style="width: 65px !important;">Type:</label> - <div class="col-xs-10 controls"> - <div class="col-xs-8"> - <select name="type" id="type" class="form-control span3" style="width: 280px !important;"></select> + <div class="col-xs-10 controls input-append" style="display: flex; padding-left: 20px;"> + <div class="col-9"> + <select name="type" id="type" class="form-control" style="width: 340px !important;"><option>Loading...</option></select> + </div> + <div class="col-3" style="margin-left: 10px;"> + <button type="button" id="dataset_info" class="btn btn-primary form-control" style="width: 50px !important;">Info</button> </div> </div> </div> <div class="form-group"> <label for="dataset" class="col-xs-1 control-label" style="width: 65px !important;">Dataset:</label> - <div class="col-xs-10 controls input-append"> - <div class="col-xs-10"> - <select name="dataset" id="dataset" class="form-control span5" style="width: 340px !important;"></select> + <div class="col-xs-10 controls" style="display: flex; padding-left: 20px;"> + <div class="col-9"> + <select name="dataset" id="dataset" class="form-control" style="max-width: 550px; width: 450px !important;"><option>Loading...</option></select> <i class="icon-question-sign"></i> </div> - <div class="col-xs-2"> - <button type="button" id="dataset_info" class="btn btn-primary form-control" style="width: 75px !important;">Info</button> - </div> </div> </div> - - <!-- USER HELP --> - <!--<p>Databases marked with <b>**</b>--> - <!-- suffix are not public yet.<br>--> - <!-- Access requires <a href="/account.html" target=--> - <!-- "_blank" class="fs14">user login</a>.</p>--> <!-- GET ANY SEARCH --> <div class="form-group"> <label for="or_search" class="col-xs-1 control-label" style="padding-left: 0px; padding-right: 0px; width: 65px !important;">Get Any:</label> - <div class="col-xs-10 controls"> - <div class="col-xs-8"> - <textarea onkeydown="pressed(event)" name="search_terms_or" rows="1" class="form-control search-query" style="max-width: 550px; width: 450px !important;" id="or_search"></textarea> + <div class="col-xs-10 controls" style="padding-left: 20px;"> + <div class="col-8"> + <textarea onkeydown="pressed(event)" name="search_terms_or" rows="1" class="form-control search-query" style="resize: vertical; max-width: 550px; width: 450px !important;" id="or_search"></textarea> </div> </div> </div> @@ -90,8 +104,8 @@ <!-- GET ANY HELP --> <div class="form-group"> <label for="btsearch" class="col-xs-1 control-label" style="width: 65px !important;"></label> - <div class="col-xs-10 controls"> - <div class="col-xs-12 controls"> + <div class="col-xs-10 controls" style="padding-left: 20px;"> + <div class="col-12 controls"> Enter terms, genes, ID numbers in the <b>Search</b> field.<br> Use <b>*</b> or <b>?</b> wildcards (Cyp*a?, synap*).<br> Use <b>quotes</b> for terms such as <i>"tyrosine kinase"</i>. @@ -101,29 +115,30 @@ <div class="form-group"> <label for="and_search" class="col-xs-1 control-label" style="padding-left: 0px; padding-right: 0px; width: 65px !important;">Combined:</label> - <div class="col-xs-10 controls"> - <div class="col-xs-8"> - <textarea onkeydown="pressed(event)" name="search_terms_and" rows="1" class="form-control search-query" style="max-width: 550px; width: 450px !important;" id="and_search"></textarea> + <div class="col-xs-10 controls" style="padding-left: 20px;"> + <div class="col-8"> + <textarea onkeydown="pressed(event)" name="search_terms_and" rows="1" class="form-control search-query" style="resize: vertical; max-width: 550px; width: 450px !important;" id="and_search"></textarea> </div> </div> </div> <div class="form-group"> <label for="btsearch" class="col-xs-1 control-label" style="width: 65px !important;"></label> - <div class="col-xs-10 controls"> - <div class="col-xs-2 controls" style="width: 100px !important;"> + <div class="col-xs-10 controls" style="display: flex; padding-left: 20px;"> + <div class="col-2 controls"> <input id="btsearch" type="submit" class="btn btn-primary form-control" value="Search"> </div> + <div class="col-2 controls" style="padding-left: 20px;"> + <button type="button" id="make_default" class="btn btn-primary form-control">Make Default</button> + </div> </div> </div> <!-- SEARCH, MAKE DEFAULT --> - <div class="form-group"> </div> <input type="hidden" name="FormID" value="searchResult" class="form-control"> - <!--!<input type="hidden" name="RISet" value="BXD">--> </div> </fieldset> </form> @@ -133,33 +148,43 @@ <h2>Advanced commands</h2> </div> - <p>You can also use advanced commands. Copy these simple examples into the Get Any or Combined search fields:</p> + <p>You can also use advanced commands. Copy these simple examples into the Get Any field for single term searches and Combined for searches with multiple terms:</p> <ul> <li><b>POSITION=(chr1 25 30)</b> finds genes, markers, or transcripts on chromosome 1 between 25 and 30 Mb.</li> - <li><b>MEAN=(15 16) LRS=(23 46)</b> in the <b>Combined</b> field finds - highly expressed genes (15 to 16 log2 units) AND with peak <a href="{{ url_for('glossary_blueprint.glossary') }}#L">LRS</a> - linkage between 23 and 46.</li> + <li><b>MEAN=(15 16)</b> in the <b>Combined</b> field finds + highly expressed genes (15 to 16 log2 units)</li> <li><b>RANGE=(1.5 2.5)</b> in the <b>Any</b> field finds traits with values with a specified fold-range (minimum = 1). Useful for finding "housekeeping genes" <b>(1.0 1.2)</b> or highly variable molecular assays <b>(10 100)</b>.</li> + <li><b>LRS=(15 1000)</b> or <b>LOD=(2 8)</b> finds all traits with peak LRS or LOD scores between lower and upper limits.</li> + + <li><b>LRS=(9 999 Chr4 122 155)</b> finds all traits on Chr 4 from 122 and 155 Mb with LRS scores between 9 and 999.</li> + + <li><b>cisLRS=(15 1000 5)</b> or <b>cisLOD=(2 8 5)</b> finds all cis eQTLs with peak LRS or LOD scores between lower and upper limits, + with an <b>inclusion</b> zone of 5 Mb around the parent gene.</li> + + <li><b>transLRS=(15 1000 5)</b> or <b>transLOD=(2 8 5)</b> finds all trans eQTLs with peak LRS or LOD scores between lower and upper limits, + with an <b>exclusion</b> zone of 5 Mb around the parent gene. You can also add a fourth term specifying which chromosome you want the transLRS to be on + (for example transLRS=(15 1000 5 7) would find all trans eQTLs with peak LRS on chromosome 7 that is also a trans eQTL with exclusionary zone of 5Mb).</li> + + <li><b>POSITION=(Chr4 122 130) cisLRS=(9 999 10)</b> + finds all traits on Chr 4 from 122 and 155 Mb with cisLRS scores + between 9 and 999 and an inclusion zone of 10 Mb.</li> + <li><b>RIF=mitochondrial</b> searches RNA databases for <a href="https://en.wikipedia.org/wiki/GeneRIF"> GeneRIF</a> links.</li> - <li><b>WIKI=nicotine</b> searches <a href="http://www.genenetwork.org/webqtl/main.py?FormID=geneWiki"> + <li><b>WIKI=nicotine</b> searches <a href="http://gn1.genenetwork.org/webqtl/main.py?FormID=geneWiki"> GeneWiki</a> for genes that you or other users have annotated with the word <i>nicotine</i>.</li> <li><b>GO:0045202</b> searches for synapse-associated genes listed in the - <a href="http://amigo.geneontology.org/amigo/medial_search?q=GO%3A0045202">Gene Ontology</a>.</li> - - <li><b>GO:0045202 LRS=(9 99 Chr4 122 155) cisLRS=(9 999 10)</b> - finds synapse-associated genes with <a href="{{ url_for('glossary_blueprint.glossary') }}#E"> - cis eQTL</a> on Chr 4 from 122 and 155 Mb with LRS scores - between 9 and 999.</li> + <a href="http://amigo.geneontology.org/amigo/medial_search?q=GO%3A0045202"> + Gene Ontology</a>.</li> <li><b>RIF=diabetes LRS=(9 999 Chr2 100 105) transLRS=(9 999 10)</b> finds diabetes-associated transcripts with peak <a href="{{ url_for('glossary_blueprint.glossary') }}#E"> @@ -168,99 +193,56 @@ </ul> </section> </div> - <div style="padding-left:120px" class="col-xs-4" style="width: 600px !important;"> - <!-- - <section id="tour-info"> - <div class="page-header"> - <h1>Tour and more info</h1> - </div> - - <h3>Thirty minute tour</h3> - <p> - Take the 30 minute - GeneNetwork <a href="http://www.genenetwork.org/tutorial/WebQTLTour/" class="fs14">tour</a> that includes screen shots and - typical steps in the analysis. - </p> - - <h3>Even more info</h3> - <p> - For information about - resources and methods, select the Info buttons next to the Group - and Database fields above. - </p> - - <p>The <a href="/conditionsofUse.html">conditions</a> - and <a href="/statusandContact.html">contact - </a> pages have information on the status of data sets - and advice on their use and citation.</p> + <div class="col-xs-4" style="width: 600px !important;"> + <section id="affiliates"> + <div class="page-header"> + <h1>Affiliates</h1> + <ul> + <li><b><a href="http://gn1.genenetwork.org">GeneNetwork 1</a> at UTHSC</b></li> + <li><a href="https://systems-genetics.org/">Systems Genetics</a> at EPFL</li> + <li><a href="http://bnw.genenetwork.org/">Bayesian Network Web Server</a> at UTHSC</li> + <li><a href="https://www.geneweaver.org/">GeneWeaver</a></li> + <li><a href="https://phenogen.org/">PhenoGen</a> at University of Colorado</li> + <li><a href="http://www.webgestalt.org/">WebGestalt</a> at Baylor</li> + </ul> + </div> </section> - --> - + <section id="news-section"> + <div class="page-header"> + <h1>News</h1> + <div id="tweets" style="height: 300px; overflow: scroll; overflow-x: hidden;"></div> + <div align="right"> + <a href="https://twitter.com/GeneNetwork2">more news items...</a> + </div> + </div> + </section> <section id="websites"> <div class="page-header"> - <h1>Affiliates and mirrors</h1> + <h1>Github</h1> + <ul> + <li><a href="https://github.com/genenetwork/genenetwork2">GN2 Source Code</a></li> + <li><a href="https://github.com/genenetwork/genenetwork">GN1 Source Code</a></li> + <li><a href="https://github.com/genenetwork/sysmaintenance">System Maintenance Code</a></li> + </ul> </div> - <h3>Websites affiliated with GeneNetwork</h3> - <ul> - <li><span class="broken_link" href="http://ucscbrowser.genenetwork.org/">Genome Browser</span> at UTHSC</li> - - <li><a href="http://galaxy.genenetwork.org/">Galaxy</a> at - UTHSC</li> - - <li>GeneNetwork 1 at <span class="broken_link" href="http://ec2.genenetwork.org/">Amazon - Cloud (EC2)</span></li> - - <li>GeneNetwork 1 Source Code at <a href="http://sourceforge.net/projects/genenetwork/">SourceForge</a></li> - - <li>GeneNetwork 2 Source Code at <a href="https://github.com/genenetwork/genenetwork2">GitHub</a></li> - </ul> - <h3>GN1 Mirror and development sites</h3> - - <ul> - <li><a href="http://gn1.genenetwork.org/">Main GN1 site at UTHSC</a> (main site)</li> - <li><span class="broken_link" href="http://genenetwork.helmholtz-hzi.de/">Germany at the HZI</span></li> - <li><a href="http://genenetwork.org/">Memphis at the U of M</a></li> - </ul> - </section> - - <!--<section id="getting-started"> + </section> + <section id="websites"> <div class="page-header"> - <h1>Getting started</h1> + <h1>Links</h1> </div> - - <ol style="font-size:12px;font-family:verdana;color:black"> - <li>Select <b>Species</b> (or All)</li> - - <li>Select <b>Group</b> (a specific sample)</li> - - <li>Select <b>Type</b> of data: - + <h3>GeneNetwork v2:</h3> + <ul> + <li><a href="http://genenetwork.org/">Main website</a> at UTHSC</li> + </ul> + <h3>GeneNetwork v1:</h3> <ul> - <li>Phenotype (traits)</li> - - <li>Genotype (markers)</li> - - <li>Expression (mRNAs)</li> + <li><a href="http://gn1.genenetwork.org/">Main website</a> at UTHSC</li> + <li><span class="broken_link" href="http://artemis.uthsc.edu/">Time Machine</span>: Full GN versions from 2009 to 2016 (mm9)</li> + Cloud (EC2)</a></li> </ul> - </li> - - <li>Select a <b>Dataset</b></li> - - <li>Enter terms in the search field: words, - genes, ID numbers, probes, advanced search commands</li> - - <li>Click the <b>Search</b> button</li> - - <li>Optional: Use the <b>Make Default</b> button to save your preferences</li> - </ol> - - <h3>User Guide</h3> - <h5>Read the - <a href="http://www.genenetwork.org/index4.html"> - user guide</a>.</h5> - - </section>--> + <script type="text/javascript" src="//rf.revolvermaps.com/0/0/8.js?i=526mdlpknyd&m=0&c=ff0000&cr1=ffffff&f=arial&l=33" async="async"></script> + </section> </div> </div> </div> @@ -268,11 +250,7 @@ {%endblock%} {% block js %} - - <script> - gn_server_url = "{{ gn_server_url }}"; - </script> - <script src="/static/new/javascript/dataset_select_menu.js"></script> + <script src="/static/new/javascript/dataset_select_menu_orig.js"></script> <script> function pressed(e) { @@ -288,4 +266,21 @@ } </script> + <script language="javascript" type="text/javascript" src="{{ url_for('js', filename='javascript-twitter-post-fetcher/js/twitterFetcher_min.js') }}"></script> + + <script type="text/javascript"> + var configProfile = { + "profile": {"screenName": 'GeneNetwork2'}, + "domId": 'tweets', + "maxTweets": 5, + "enableLinks": true, + "showUser": false, + "showTime": true, + "showImages": true, + "lang": 'en' + }; + twitterFetcher.fetch(configProfile); + </script> + + {% endblock %} diff --git a/wqflask/wqflask/templates/index_page_orig.html b/wqflask/wqflask/templates/index_page_orig.html deleted file mode 100755 index 7f82b35c..00000000 --- a/wqflask/wqflask/templates/index_page_orig.html +++ /dev/null @@ -1,349 +0,0 @@ -{% extends "base.html" %} -{% block title %}GeneNetwork{% endblock %} -{% block css %} -<style TYPE="text/css"> - p.interact { display: none; } -</style> -{% endblock %} -{% block content %} -<!-- Start of body --> - -<!-- - <header class="jumbotron subhead" id="overview"> - <div class="container"> - <h1>GeneNetwork</h1> - <p class="lead">Open source bioinformatics for systems genetics</p> - </div> - </header> ---> - - <div class="container-fluid" style="min-width: 1210px;"> - - {{ flash_me() }} - - <div class="row" style="width: 100%;"> - - <div class="col-xs-4" style="margin-right:50px; min-width: 530px; max-width: 550px;"> - <section id="search"> - <div> - <h1>Select and search</h1> - </div> - <form method="get" action="/search" target="_blank" name="SEARCHFORM"> - <fieldset> - <div style="padding: 20px" class="form-horizontal"> - - <div class="form-group"> - <label for="species" class="col-xs-1 control-label" style="width: 65px !important;">Species:</label> - <div class="col-xs-10 controls input-append" style="display: flex; padding-left: 20px;"> - <div class="col-8"> - <select name="species" id="species" class="form-control" style="width: 280px !important;"><option>Loading...</option></select> - </div> - </div> - </div> - - <div class="form-group"> - <label for="group" class="col-xs-1 control-label" style="width: 65px !important;">Group:</label> - <div class="col-xs-10 controls input-append" style="display: flex; padding-left: 20px;"> - <div class="col-9"> - <select name="group" id="group" class="form-control" style="width: 340px !important;"><option>Loading...</option></select> - <i class="icon-question-sign"></i> - </div> - <div class="col-3" style="margin-left: 10px;"> - <button type="button" id="group_info" class="btn btn-primary form-control" style="width: 50px !important;">Info</button> - </div> - </div> - </div> - - <div class="form-group"> - <label for="tissue" class="col-xs-1 control-label" style="width: 65px !important;">Type:</label> - <div class="col-xs-10 controls input-append" style="display: flex; padding-left: 20px;"> - <div class="col-9"> - <select name="type" id="type" class="form-control" style="width: 340px !important;"><option>Loading...</option></select> - </div> - <div class="col-3" style="margin-left: 10px;"> - <button type="button" id="dataset_info" class="btn btn-primary form-control" style="width: 50px !important;">Info</button> - </div> - </div> - </div> - - <div class="form-group"> - <label for="dataset" class="col-xs-1 control-label" style="width: 65px !important;">Dataset:</label> - <div class="col-xs-10 controls" style="display: flex; padding-left: 20px;"> - <div class="col-9"> - <select name="dataset" id="dataset" class="form-control" style="max-width: 550px; width: 450px !important;"><option>Loading...</option></select> - <i class="icon-question-sign"></i> - </div> - </div> - </div> - - <!-- USER HELP --> - <!--<p >Databases marked with <b>**</b>--> - <!-- suffix are not public yet.<br>--> - <!-- Access requires <a href="/account.html" target=--> - <!-- "_blank" class="fs14">user login</a>.</p>--> - <!-- GET ANY SEARCH --> - - <div class="form-group"> - <label for="or_search" class="col-xs-1 control-label" style="padding-left: 0px; padding-right: 0px; width: 65px !important;">Get Any:</label> - <div class="col-xs-10 controls" style="padding-left: 20px;"> - <div class="col-8"> - <textarea onkeydown="pressed(event)" name="search_terms_or" rows="1" class="form-control search-query" style="resize: vertical; max-width: 550px; width: 450px !important;" id="or_search"></textarea> - </div> - </div> - </div> - - <!-- GET ANY HELP --> - <div class="form-group"> - <label for="btsearch" class="col-xs-1 control-label" style="width: 65px !important;"></label> - <div class="col-xs-10 controls" style="padding-left: 20px;"> - <div class="col-12 controls"> - Enter terms, genes, ID numbers in the <b>Search</b> field.<br> - Use <b>*</b> or <b>?</b> wildcards (Cyp*a?, synap*).<br> - Use <b>quotes</b> for terms such as <i>"tyrosine kinase"</i>. - </div> - </div> - </div> - - <div class="form-group"> - <label for="and_search" class="col-xs-1 control-label" style="padding-left: 0px; padding-right: 0px; width: 65px !important;">Combined:</label> - <div class="col-xs-10 controls" style="padding-left: 20px;"> - <div class="col-8"> - <textarea onkeydown="pressed(event)" name="search_terms_and" rows="1" class="form-control search-query" style="resize: vertical; max-width: 550px; width: 450px !important;" id="and_search"></textarea> - </div> - </div> - </div> - - <div class="form-group"> - <label for="btsearch" class="col-xs-1 control-label" style="width: 65px !important;"></label> - <div class="col-xs-10 controls" style="display: flex; padding-left: 20px;"> - <div class="col-2 controls"> - <input id="btsearch" type="submit" class="btn btn-primary form-control" value="Search"> - </div> - <div class="col-2 controls" style="padding-left: 20px;"> - <button type="button" id="make_default" class="btn btn-primary form-control">Make Default</button> - </div> - </div> - </div> - - <!-- SEARCH, MAKE DEFAULT --> - - <div class="form-group"> - </div> - - <input type="hidden" name="FormID" value="searchResult" class="form-control"> - <!--!<input type="hidden" name="RISet" value="BXD">--> - </div> - </fieldset> - </form> - </section> - <section id="advanced"> - <div class="page-header"> - <h2>Advanced commands</h2> - </div> - - <p>You can also use advanced commands. Copy these simple examples into the Get Any field for single term searches and Combined for searches with multiple terms:</p> - - <ul> - <li><b>POSITION=(chr1 25 30)</b> finds genes, markers, or transcripts on - chromosome 1 between 25 and 30 Mb.</li> - - <li><b>MEAN=(15 16)</b> in the <b>Combined</b> field finds - highly expressed genes (15 to 16 log2 units)</li> - - <li><b>RANGE=(1.5 2.5)</b> in the <b>Any</b> field finds traits with values with a specified fold-range (minimum = 1). - Useful for finding "housekeeping genes" <b>(1.0 1.2)</b> or highly variable molecular assays <b>(10 100)</b>.</li> - - <li><b>LRS=(15 1000)</b> or <b>LOD=(2 8)</b> finds all traits with peak LRS or LOD scores between lower and upper limits.</li> - - <li><b>LRS=(9 999 Chr4 122 155)</b> finds all traits on Chr 4 from 122 and 155 Mb with LRS scores between 9 and 999.</li> - - <li><b>cisLRS=(15 1000 5)</b> or <b>cisLOD=(2 8 5)</b> finds all cis eQTLs with peak LRS or LOD scores between lower and upper limits, - with an <b>inclusion</b> zone of 5 Mb around the parent gene.</li> - - <li><b>transLRS=(15 1000 5)</b> or <b>transLOD=(2 8 5)</b> finds all trans eQTLs with peak LRS or LOD scores between lower and upper limits, - with an <b>exclusion</b> zone of 5 Mb around the parent gene. You can also add a fourth term specifying which chromosome you want the transLRS to be on - (for example transLRS=(15 1000 5 7) would find all trans eQTLs with peak LRS on chromosome 7 that is also a trans eQTL with exclusionary zone of 5Mb).</li> - - <li><b>POSITION=(Chr4 122 130) cisLRS=(9 999 10)</b> - finds all traits on Chr 4 from 122 and 155 Mb with cisLRS scores - between 9 and 999 and an inclusion zone of 10 Mb.</li> - - <li><b>RIF=mitochondrial</b> searches RNA databases for <a href="https://en.wikipedia.org/wiki/GeneRIF"> - GeneRIF</a> links.</li> - - <li><b>WIKI=nicotine</b> searches <a href="http://gn1.genenetwork.org/webqtl/main.py?FormID=geneWiki"> - GeneWiki</a> for genes that you or other users have annotated - with the word <i>nicotine</i>.</li> - - <li><b>GO:0045202</b> searches for synapse-associated genes listed in the - <a href="http://amigo.geneontology.org/amigo/medial_search?q=GO%3A0045202"> - Gene Ontology</a>.</li> - - <li><b>RIF=diabetes LRS=(9 999 Chr2 100 105) transLRS=(9 999 10)</b> - finds diabetes-associated transcripts with peak <a href="{{ url_for('glossary_blueprint.glossary') }}#E"> - trans eQTLs</a> on Chr 2 between 100 and 105 Mb with LRS - scores between 9 and 999.</li> - </ul> - </section> - </div> - - <div class="col-xs-4" style="width: 600px !important;"> - <section id="affiliates"> - <div class="page-header"> - <h1>Affiliates</h1> - <ul> - <li><b><a href="http://gn1.genenetwork.org">GeneNetwork 1</a> at UTHSC</b></li> - <li><span class="broken_link" href="http://ucscbrowser.genenetwork.org/">Genome Browser</span> at UTHSC</li> - <li><a href="https://systems-genetics.org/">Systems Genetics</a> at EPFL</li> - <li><a href="http://bnw.genenetwork.org/">Bayesian Network Web Server</a> at UTHSC</li> - <li><a href="https://www.geneweaver.org/">GeneWeaver</a></li> - <li><a href="https://phenogen.org/">PhenoGen</a> at University of Colorado</li> - <li><a href="http://www.webgestalt.org/">WebGestalt</a> at Baylor</li> - </ul> - </div> - </section> - <section id="news-section"> - <div class="page-header"> - <h1>News</h1> - <div id="tweets" style="height: 300px; overflow: scroll; overflow-x: hidden;"></div> - <div align="right"> - <a href="https://twitter.com/GeneNetwork2">more news items...</a> - </div> - </div> - </section> - <section id="websites"> - <div class="page-header"> - <h1>Github</h1> - <ul> - <li><a href="https://github.com/genenetwork/genenetwork2">GN2 Source Code</a></li> - <li><a href="https://github.com/genenetwork/genenetwork">GN1 Source Code</a></li> - <!--<li><a href="https://github.com/genenetwork/gn-docs/wiki">GN2 Document Wiki</a></li>--> - <li><a href="https://github.com/genenetwork/sysmaintenance">System Maintenance Code</a></li> - </ul> - </div> - </section> - <!-- - <section id="tour-info"> - <div class="page-header"> - <h1>Tour and more info</h1> - </div> - - <h3>Thirty minute tour</h3> - <p> - Take the 30 minute - GeneNetwork <a href="http://www.genenetwork.org/tutorial/WebQTLTour/" class="fs14">tour</a> that includes screen shots and - typical steps in the analysis. - </p> - - <h3>Even more info</h3> - <p> - For information about - resources and methods, select the Info buttons next to the Group - and Database fields above. - </p> - - <p>The <a href="/conditionsofUse.html">conditions</a> - and <a href="/statusandContact.html">contact - </a> pages have information on the status of data sets - and advice on their use and citation.</p> - - </section> - - </section> - --> - <section id="websites"> - <div class="page-header"> - <h1>Links</h1> - </div> - <h3>GeneNetwork v2:</h3> - <ul> - <li><a href="http://genenetwork.org/">Main website</a> at UTHSC</li> - <!--<li><a href="http://test-genenetwork.org/">Testing website</a> at UTHSC</li>--> - </ul> - <h3>GeneNetwork v1:</h3> - <ul> - <li><a href="http://gn1.genenetwork.org/">Main website</a> at UTHSC</li> - <li><span class="broken_link" href="http://artemis.uthsc.edu/">Time Machine</span>: Full GN versions from 2009 to 2016 (mm9)</li> - Cloud (EC2)</a></li> - </ul> - <script type="text/javascript" src="//rf.revolvermaps.com/0/0/8.js?i=526mdlpknyd&m=0&c=ff0000&cr1=ffffff&f=arial&l=33" async="async"></script> - </section> - - <!--<section id="getting-started"> - <div class="page-header"> - <h1>Getting started</h1> - </div> - - <ol style="font-size:12px;font-family:verdana;color:black"> - <li>Select <b>Species</b> (or All)</li> - - <li>Select <b>Group</b> (a specific sample)</li> - - <li>Select <b>Type</b> of data: - - <ul> - <li>Phenotype (traits)</li> - - <li>Genotype (markers)</li> - - <li>Expression (mRNAs)</li> - </ul> - </li> - - <li>Select a <b>Dataset</b></li> - - <li>Enter terms in the search field: words, - genes, probes, advanced search commands</li> - - <li>Click the <b>Search</b> button</li> - - <li>Optional: Use the <b>Make Default</b> button to save your preferences</li> - </ol> - - <h3>User Guide</h3> - <h5>Read the - <a href="http://gn1.genenetwork.org/index4.html"> - user guide</a>.</h5> - - </section>--> - </div> - </div> - </div> - -{%endblock%} - -{% block js %} - <script src="/static/new/javascript/dataset_select_menu_orig.js"></script> - - <script> - function pressed(e) { - // Has the enter key been pressed? - if ( (window.event ? event.keyCode : e.which) == 13) { - e.preventDefault(); - // If enter key has been pressed and the search fields are non-empty - // manually submit the <form> - if( event.target.value.trim() != "" ) { - document.forms[1].submit(); - } - } - } - </script> - - <script language="javascript" type="text/javascript" src="{{ url_for('js', filename='javascript-twitter-post-fetcher/js/twitterFetcher_min.js') }}"></script> - - <script type="text/javascript"> - var configProfile = { - "profile": {"screenName": 'GeneNetwork2'}, - "domId": 'tweets', - "maxTweets": 5, - "enableLinks": true, - "showUser": false, - "showTime": true, - "showImages": true, - "lang": 'en' - }; - twitterFetcher.fetch(configProfile); - </script> - - -{% endblock %} diff --git a/wqflask/wqflask/templates/loading.html b/wqflask/wqflask/templates/loading.html index 926f258d..6d6136ac 100644 --- a/wqflask/wqflask/templates/loading.html +++ b/wqflask/wqflask/templates/loading.html @@ -7,34 +7,68 @@ {% endfor %} <div class="container"> <div> - <div style="min-height: 80vh; display: flex; align-items: center; text-align: center;"> - <div style="margin-bottom: 5px; left: 50%; margin-right: -50%; margin-top: 10%; transform: translate(-50%, -50%); position: absolute;"> - {% if start_vars.tool_used == "Mapping" %} - <h1>Computing the Maps</h1> - <br> - <i>n</i> = {{ start_vars.n_samples }} - <br> - Method = {% if start_vars.method == "gemma" %}GEMMA{% else %}{{ start_vars.method }}{% endif %} - <br> - {% if start_vars.transform != "" %} - transform = {{ start_vars.transform }} - <br> - {% endif %} - MAF >= {{ start_vars.maf }} - {% else %} - <h1>Loading {{ start_vars.tool_used }} Results...</h1> - {% endif %} - <br><br> - <img align="center" src="/static/gif/89.gif"> - </div> - <!--<div style="margin-left: auto; margin-right: auto; display: block; width: 50%; top:50%;"> + <div style="min-height: 80vh; display: flex; align-items: center; text-align: left;"> + <div style="margin-bottom: 5px; left: 50%; margin-right: -50%; top: 50%; transform: translate(-50%, -50%); position: absolute;"> + {% if start_vars.tool_used == "Mapping" %} + <h1>Computing the Maps</h1> + <br> + <b>Trait Metadata</b> + <br> + species = <b><i>{{ start_vars.species[0] | upper }}{{ start_vars.species[1:] }}</i></b> + <br> + group = <b><i>{{ start_vars.group[0] | upper }}{{ start_vars.group[1:] }}</i></b> + <br> + trait identifier = <b><i>{{ start_vars.trait_id }}</i></b> + <br> + n of sample = <b><i>{{ start_vars.n_samples }}</i></b> + {% if start_vars.transform != "" %} + <br> + transformation = <b><i>{{ start_vars.transform }}</i></b> + {% endif %} + <br><br> + <b>Mapping Metadata</b> + <br> + mapping method = <b><i>{% if start_vars.method == "gemma" %}GEMMA {% if start_vars.use_loco == "True" %}using LOCO {% endif %}{% else %}{{ start_vars.method }}{% endif %}</i></b> + {% if start_vars.maf != "" and start_vars.method != "reaper" %} + <br> + minor allele frequency lower limit = <b><i>{{ start_vars.maf }}</i></b> + {% endif %} + <br> + {% if start_vars.covariates != "" and start_vars.method != "reaper" %} + {% set covariate_list = start_vars.covariates.split(",") %} + cofactors = <b><i>{% for covariate in covariate_list %}{% set this_covariate = covariate.split(":")[0] %}{{ this_covariate }}{% if not loop.last %}, {% endif %}{% endfor %}</i></b> + {% else %} + cofactors = <b><i>None</i></b> + {% endif %} + {% if start_vars.control_marker != "" and start_vars.do_control == "true" and start_vars.method != "gemma" %} + <br> + marker covariate = <b><i>{{ start_vars.control_marker }}</i></b> + {% endif %} + <br> + {% if start_vars.genofile != "" %} + {% set genofile_desc = start_vars.genofile.split(":")[1] %} + genotype file = <b><i>{{ genofile_desc }}</i></b> + {% else %} + genotype file = <b><i>{{ start_vars.group[0] | upper }}{{ start_vars.group[1:] }}.geno</i></b> + {% endif %} + {% if start_vars.num_perm | int > 0 and start_vars.method != "gemma" %} + <br> + n of permutations = <b><i>{{ start_vars.num_perm }}</i></b> + {% endif %} + {% if num_bootstrap in start_vars %} + {% if start_vars.num_bootstrap | int > 0 and start_vars.method == "reaper" %} + <br> + n of bootstrap = <b><i>{{ start_vars.num_bootstrap }}</i></b> + {% endif %} + {% endif %} + {% else %} + <h1>Loading {{ start_vars.tool_used }} Results...</h1> + {% endif %} + <br><br> + <div style="text-align: center;"> <img align="center" src="/static/gif/89.gif"> - </div>--> - <!-- - <div class="progress center-block" style="margin-left: 25%; margin-right: 25%; position: absolute; height:50px; width:50%; top:50%;"> - <div class="progress-bar progress-bar-striped active" role="progressbar" aria-valuenow="100" aria-valuemin="0" aria-valuemax="100" style="width:100%;"></div> + </div> </div> - --> </div> </div> </div> diff --git a/wqflask/wqflask/templates/mapping_results.html b/wqflask/wqflask/templates/mapping_results.html index c08b21ef..d6fc6e37 100644 --- a/wqflask/wqflask/templates/mapping_results.html +++ b/wqflask/wqflask/templates/mapping_results.html @@ -17,8 +17,9 @@ <input type="hidden" name="temp_uuid" value="{{ temp_uuid }}"> {% if temp_trait is defined %} <input type="hidden" name="temp_trait" value="{{ temp_trait }}"> - <input type="hidden" name="group" value="{{ group }}"> {% endif %} + <input type="hidden" name="group" value="{{ dataset.group.name }}"> + <input type="hidden" name="species" value="{{ dataset.group.species }}"> <input type="hidden" name="trait_id" value="{{ this_trait.name }}"> <input type="hidden" name="dataset" value="{{ dataset.name }}"> <input type="hidden" name="genofile" value="{{ genofile_string }}"> @@ -71,7 +72,7 @@ {% endif %} <br> <br> - <a id="export_mapping_results" href="#" target="_blank" >Download Full Results</a> + <a class="export_mapping_results" href="#" target="_blank" >Download Full Results</a> </div> <div id="gn1_map_options" class="col-xs-5" style="outline: 3px double #AAAAAA; padding: 10px; margin: 10px;"> <div class="col-xs-8" style="padding: 0px;"> @@ -247,6 +248,7 @@ <button class="btn btn-default" id="deselect_all"><span class="glyphicon glyphicon-remove"></span> Deselect All</button> <button class="btn btn-default" id="invert"><span class="glyphicon glyphicon-resize-vertical"></span> Invert</button> {% if geno_db_exists == "True" %}<button class="btn btn-success" id="add" disabled><span class="glyphicon glyphicon-plus-sign"></span> Add</button>{% endif %} + <button class="btn btn-default export_mapping_results" >Download <span class="glyphicon glyphicon-download"></span></button> <br /> <br /> <div id="table_container" style="width:{% if 'additive' in trimmed_markers[0] %}600{% else %}550{% endif %}px;"> @@ -355,7 +357,7 @@ {% endif %} <script language="javascript" type="text/javascript" src="{{ url_for('js', filename='DataTables/js/jquery.dataTables.min.js') }}"></script> - <script language="javascript" type="text/javascript" src="https://cdn.datatables.net/buttons/1.0.0/js/dataTables.buttons.min.js"></script> + <script language="javascript" type="text/javascript" src="{{ url_for('js', filename='DataTablesExtensions/buttons/js/dataTables.buttons.min.js') }}"></script> <script language="javascript" type="text/javascript" src="{{ url_for('js', filename='DataTablesExtensions/plugins/sorting/scientific.js') }}"></script> <script language="javascript" type="text/javascript" src="{{ url_for('js', filename='DataTablesExtensions/plugins/sorting/natural.js') }}"></script> <script language="javascript" type="text/javascript" src="{{ url_for('js', filename='purescript-genome-browser/js/purescript-genetics-browser.js') }}"></script> @@ -480,7 +482,7 @@ var mapping_input_list = ['temp_uuid', 'trait_id', 'dataset', 'tool_used', 'form_url', 'method', 'transform', 'trimmed_markers', 'selected_chr', 'chromosomes', 'mapping_scale', 'sample_vals', 'score_type', 'suggestive', 'significant', 'num_perm', 'permCheck', 'perm_output', 'perm_strata', 'categorical_vars', 'num_bootstrap', 'bootCheck', 'bootstrap_results', - 'LRSCheck', 'covariates', 'maf', 'use_loco', 'manhattan_plot', 'color_scheme', 'manhattan_single_color', 'control_marker', 'control_marker_db', 'do_control', 'genofile', + 'LRSCheck', 'covariates', 'maf', 'use_loco', 'manhattan_plot', 'color_scheme', 'manhattan_single_color', 'control_marker', 'do_control', 'genofile', 'pair_scan', 'startMb', 'endMb', 'graphWidth', 'lrsMax', 'additiveCheck', 'showSNP', 'showGenes', 'viewLegend', 'haplotypeAnalystCheck', 'mapmethod_rqtl_geno', 'mapmodel_rqtl_geno', 'temp_trait', 'group', 'species', 'reaper_version', 'primary_samples', 'n_samples'] @@ -612,7 +614,7 @@ return $('#marker_regression_form').submit(); } - $('#export_mapping_results').click(export_mapping_results); + $('.export_mapping_results').click(export_mapping_results); $('#browser_tab').click(function() { $('#gn1_map_options').css("display", "none") diff --git a/wqflask/wqflask/templates/search_result_page.html b/wqflask/wqflask/templates/search_result_page.html index e7a7bc51..7ec335d5 100644 --- a/wqflask/wqflask/templates/search_result_page.html +++ b/wqflask/wqflask/templates/search_result_page.html @@ -4,7 +4,7 @@ <link rel="stylesheet" type="text/css" href="{{ url_for('css', filename='DataTables/css/jquery.dataTables.css') }}" /> <link rel="stylesheet" type="text/css" href="{{ url_for('css', filename='fontawesome/css/font-awesome.min.css') }}" /> <link rel="stylesheet" type="text/css" href="{{ url_for('js', filename='DataTablesExtensions/buttonStyles/css/buttons.dataTables.min.css') }}"> - <link rel="stylesheet" type="text/css" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.1/css/all.min.css"> + <link rel="stylesheet" type="text/css" href="{{ url_for('css', filename='fontawesome/css/all.min.css') }}"/> <link rel="stylesheet" type="text/css" href="/static/new/css/show_trait.css" /> <link rel="stylesheet" type="text/css" href="static/new/css/trait_list.css" /> {% endblock %} @@ -150,7 +150,7 @@ <script language="javascript" type="text/javascript" src="{{ url_for('js', filename='DataTablesExtensions/plugins/sorting/natural.js') }}"></script> <script language="javascript" type="text/javascript" src="{{ url_for('js', filename='DataTablesExtensions/buttons/js/dataTables.buttons.min.js') }}"></script> <script language="javascript" type="text/javascript" src="{{ url_for('js', filename='DataTablesExtensions/buttons/js/buttons.colVis.min.js') }}"></script> - <script language="javascript" type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.1/js/all.min.js"></script> + <script language="javascript" type="text/javascript" src="{{ url_for('js', filename='fontawesome/js/all.min.js') }}"></script> <script language="javascript" type="text/javascript" src="/static/new/javascript/search_results.js"></script> @@ -188,7 +188,7 @@ }); }, 'createdRow': function ( row, data, index ) { - $('td', row).eq(0).attr("style", "text-align: center; padding: 0px 10px 2px 10px;"); + $('td', row).eq(0).attr("style", "text-align: center; padding: 0px 10px 2px 13px;"); $('td', row).eq(1).attr("align", "right"); $('td', row).eq(1).attr('data-export', index+1); $('td', row).eq(2).attr('data-export', $('td', row).eq(2).text()); @@ -227,7 +227,7 @@ 'columns': [ { 'data': null, - 'width': "25px", + 'width': "10px", 'orderDataType': "dom-checkbox", 'orderable': false, 'render': function(data, type, row, meta) { @@ -420,4 +420,4 @@ }); </script> -{% endblock %}
\ No newline at end of file +{% endblock %} diff --git a/wqflask/wqflask/templates/show_trait.html b/wqflask/wqflask/templates/show_trait.html index 09ecb7b6..3dbf5f57 100644 --- a/wqflask/wqflask/templates/show_trait.html +++ b/wqflask/wqflask/templates/show_trait.html @@ -102,7 +102,7 @@ <div id="outlier_alert_placeholder"></div> </div> </div> - <div class="panel panel-default"> + <div class="panel panel-default" {% if (trait_table_width|int > 1100) %}style="min-width: {{ trait_table_width|int + 30 }}px;"{% endif %}> <div class="panel-heading" data-toggle="collapse" data-parent="#accordion" data-target="#collapseSix" aria-expanded="true"> <h3 class="panel-title"> <span class="glyphicon glyphicon-chevron-up"></span> Review and Edit Data @@ -148,7 +148,7 @@ <script language="javascript" type="text/javascript" src="{{ url_for('js', filename='DataTables/js/jquery.dataTables.js') }}"></script> <script language="javascript" type="text/javascript" src="{{ url_for('js', filename='DataTablesExtensions/plugins/sorting/scientific.js') }}"></script> <script language="javascript" type="text/javascript" src="{{ url_for('js', filename='DataTablesExtensions/plugins/sorting/natural.js') }}"></script> - <script language="javascript" type="text/javascript" src="https://cdn.datatables.net/scroller/2.0.3/js/dataTables.scroller.min.js"></script> + <script language="javascript" type="text/javascript" src="{{ url_for('js', filename='DataTablesExtensions/scroller/js/dataTables.scroller.min.js') }}"></script> <script language="javascript" type="text/javascript" src="{{ url_for('js', filename='nouislider/nouislider.js') }}"></script> <script type="text/javascript" src="/static/new/javascript/initialize_show_trait_tables.js"></script> <script type="text/javascript" src="/static/new/javascript/show_trait_mapping_tools.js"></script> diff --git a/wqflask/wqflask/templates/show_trait_calculate_correlations.html b/wqflask/wqflask/templates/show_trait_calculate_correlations.html index 50803978..ef784c84 100644 --- a/wqflask/wqflask/templates/show_trait_calculate_correlations.html +++ b/wqflask/wqflask/templates/show_trait_calculate_correlations.html @@ -117,6 +117,9 @@ <div class="col-xs-3 controls"> <input type="button" class="btn corr_compute submit_special btn-success" data-url="/corr_compute" title="Compute Correlation" value="Compute"> </div> + <div class="col-xs-3 controls"> + <input type="button" class="btn test_corr_compute submit_special btn-success" data-url="/test_corr_compute" title="Compute Correlation" value="Test Compute"> + </div> </div> </div> </div> diff --git a/wqflask/wqflask/templates/show_trait_details.html b/wqflask/wqflask/templates/show_trait_details.html index d2999eef..83f7b0ac 100644 --- a/wqflask/wqflask/templates/show_trait_details.html +++ b/wqflask/wqflask/templates/show_trait_details.html @@ -235,7 +235,7 @@ {% endif %} <button type="button" id="view_in_gn1" class="btn btn-primary" title="View Trait in GN1" onclick="window.open('http://gn1.genenetwork.org/webqtl/main.py?cmd=show&db={{ this_trait.dataset.name }}&probeset={{ this_trait.name }}', '_blank')">Go to GN1</button> {% if admin_status == "owner" or admin_status == "edit-admins" or admin_status == "edit-access" %} - <button type="button" id="edit_resource" class="btn btn-success" title="Edit Resource" onclick="window.open('./resources/manage?resource_id={{ resource_id }}', '_blank')">Edit</button> + <button type="button" id="edit_resource" class="btn btn-success" title="Edit Resource" onclick="window.open('/trait/{{ this_trait.name }}/edit/{{ this_trait.dataset.id }}', '_blank')">Edit</button> {% endif %} </div> </div> diff --git a/wqflask/wqflask/templates/show_trait_edit_data.html b/wqflask/wqflask/templates/show_trait_edit_data.html index 0d34bebc..5939c953 100644 --- a/wqflask/wqflask/templates/show_trait_edit_data.html +++ b/wqflask/wqflask/templates/show_trait_edit_data.html @@ -53,7 +53,7 @@ </div> </div> {% set outer_loop = loop %} - <div class="sample_group" style="width:{{ trait_table_width }};"> + <div class="sample_group" style="width:{{ trait_table_width }}px;"> <div style="position: relative;"> <div class="inline-div"><h3 style="float: left;">{{ sample_type.header }}<span name="transform_text"></span></h3></div> </div> diff --git a/wqflask/wqflask/templates/show_trait_mapping_tools.html b/wqflask/wqflask/templates/show_trait_mapping_tools.html index c3575454..3dd44c85 100755 --- a/wqflask/wqflask/templates/show_trait_mapping_tools.html +++ b/wqflask/wqflask/templates/show_trait_mapping_tools.html @@ -39,7 +39,7 @@ {% if genofiles and genofiles|length>0 %} <div class="mapping_method_fields form-group"> <label for="genofiles" class="col-xs-3 control-label">Genotypes</label> - <div class="col-xs-8 controls"> + <div class="col-xs-6 controls"> <select id="genofile_gemma" class="form-control"> {% for item in genofiles %} <option value="{{item['location']}}:{{item['title']}}">{{item['title']}}</option> @@ -124,7 +124,7 @@ </div> <div class="mapping_method_fields form-group"> <label for="genofiles" class="col-xs-3 control-label">Genotypes</label> - <div class="col-xs-4 controls"> + <div class="col-xs-6 controls"> <select id="genofile_reaper" class="form-control"> {% for item in genofiles %} <option value="{{item['location']}}:{{item['title']}}">{{item['title']}}</option> @@ -218,7 +218,7 @@ </div> <div class="mapping_method_fields form-group"> <label for="genofiles" class="col-xs-3 control-label">Genotypes</label> - <div class="col-xs-4 controls"> + <div class="col-xs-6 controls"> <select id="genofile_rqtl_geno" class="form-control"> {% for item in genofiles %} <option value="{{item['location']}}:{{item['title']}}">{{item['title']}}</option> @@ -364,4 +364,4 @@ {% else %} Mapping options are disabled for data not matched with genotypes. {% endif %} -</div>
\ No newline at end of file +</div> diff --git a/wqflask/wqflask/templates/show_trait_transform_and_filter.html b/wqflask/wqflask/templates/show_trait_transform_and_filter.html index b70ca590..e3f5ef81 100644 --- a/wqflask/wqflask/templates/show_trait_transform_and_filter.html +++ b/wqflask/wqflask/templates/show_trait_transform_and_filter.html @@ -20,7 +20,7 @@ <div id="remove_samples_invalid" class="alert alert-error" style="display:none;"> Please check that your input is formatted correctly, e.g. <strong>3, 5-10, 12</strong> </div> - {% if sample_groups[0].attributes %} + {% if categorical_attr_exists == "true" %} <div class="input-append block-div-2"> <label for="exclude_column">Block samples by group:</label> <select id="exclude_column" size=1> diff --git a/wqflask/wqflask/templates/submit_trait.html b/wqflask/wqflask/templates/submit_trait.html index 68b06f55..3572b0a9 100644 --- a/wqflask/wqflask/templates/submit_trait.html +++ b/wqflask/wqflask/templates/submit_trait.html @@ -14,7 +14,7 @@ <h2 style="color: #5a5a5a;">Introduction</h2> <hr> <p>The trait values that you enter are statistically compared with verified genotypes collected at a set of microsatellite markers in each RI set. The markers are drawn from a set of over 750, but for each set redundant markers have been removed, preferentially retaining those that are most informative.</p> - <p>These error-checked RI mapping data match theoretical expectations for RI strain sets. The cumulative adjusted length of the RI maps are approximately 1400 cM, a value that matches those of both MIT maps and Chromosome Committee Report maps. See our full description of the genetic data collected as part of the WebQTL project.</p> + <p>These error-checked RI mapping data match theoretical expectations for RI strain sets. The cumulative adjusted length of the RI maps are approximately 1400 cM, a value that matches those of both MIT maps and Chromosome Committee Report maps. See our <a target="_blank" href="http://www.nervenet.org/papers/BXN.html">full description</a> of the genetic data collected as part of the WebQTL project.</p> </div> </section> <br> @@ -53,7 +53,7 @@ </div> </div> </div> - <div style="padding-bottom: 50px;" class="form-horizontal"> + <div style="padding-bottom: 50px; margin-bottom:400px" class="form-horizontal"> <h3>2. Enter Trait Data:</h3> <h4 style="color:red;">File uploading isn't enabled yet, but is coming soon.</h4> <br> @@ -61,18 +61,6 @@ <img src="/static/new/images/step2.gif"> </div> <div class="col-xs-10"> - <!-- - <div class="form-group" style="padding-left: 15px;"> - <p> - <b>From a File:</b> You can enter data by entering a file name here. The file should contain a series of numbers representing trait values. - The values can be on one line separated by spaces or tabs, or they can be on separate lines. Include one value for each progeny individual - or recombinant inbred line. Represent missing values with a non-numeric character such as "x". If you have chosen a recombinant inbred set, - when you submit your data will be displayed in a form where you can confirm and/or edit them. If you enter a file name here, any data that - you paste into the next section will be ignored. - </p> - <input type="file" name="trait_file" style="border-width: 1px; border-style: solid; border-color: #999999;"> - </div> - --> <div class="form-group" style="padding-left: 15px;"> <p> <b>Paste or Type Multiple Values:</b> You can enter data by pasting a series of numbers representing trait values into this area. @@ -89,6 +77,24 @@ <input type="reset" style="width: 110px;" class="btn btn-primary form-control col-xs-2" value="Reset"> </div> </div> + <div style="padding-bottom: 50px;" class="form-horizontal"> + <h3>3. Enable Use of Trait Variance:</h3> + <div class="col-xs-2" style=""display: flex; align-items: center;"> + <img src="/static/new/images/step3.gif"> + </div> + <div class="col-xs-10"> + <div class="form-group" style="padding-left: 15px;"> + <p> + <b>Name Your Trait:</b> <span style="color:red;">(optional)</span> + </p> + <textarea name="trait_name" rows="1" cols="30"></textarea> + </div> + </div> + <div class="controls" style="display:block; margin-left: 40%; margin-right: 20%;"> + <input type="submit" style="width: 110px; margin-right: 25px;" class="btn btn-primary form-control col-xs-2" value="Submit Trait"> + <input type="reset" style="width: 110px;" class="btn btn-primary form-control col-xs-2" value="Reset"> + </div> + </div> </section> </div> </div> diff --git a/wqflask/wqflask/templates/test_correlation_page.html b/wqflask/wqflask/templates/test_correlation_page.html new file mode 100644 index 00000000..0809b65e --- /dev/null +++ b/wqflask/wqflask/templates/test_correlation_page.html @@ -0,0 +1,159 @@ +{% extends "base.html" %} +{% block title %}Correlation Results{% endblock %} +{% block css %} + <link rel="stylesheet" type="text/css" href="{{ url_for('css', filename='DataTables/css/jquery.dataTables.css') }}" /> + <link rel="stylesheet" type="text/css" href="{{ url_for('js', filename='DataTablesExtensions/buttonsBootstrap/css/buttons.bootstrap.css') }}" /> + <link rel="stylesheet" type="text/css" href="{{ url_for('js', filename='DataTablesExtensions/buttonStyles/css/buttons.dataTables.min.css') }}"> + <link rel="stylesheet" type="text/css" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.1/css/all.min.css"> + <link rel="stylesheet" type="text/css" href="/static/new/css/trait_list.css" /> + <link rel="stylesheet" type="text/css" href="/static/new/css/show_trait.css" /> + + <style type="text/css"> + .td-styles{ + height: 40px; + text-align: center; + } + .trait_col { + font-weight:bolder; + text-align: center; + color:#036ffc; + /*font-size: 1.1em;*/ + } + table th { + font-weight: bolder; + text-transform: uppercase; + } + .correlation-title { + padding:25px 10px; + } + .correlation-title h3 span { + font-weight: bolder; + } + .header-toggle-vis { + padding:10px 5px; + } + .header-toggle-vis button { + border-radius: 5px; + + } + </style> +{% endblock %} + +{% block content %} + +<div class="correlation-title"> + <h3>Correlation Results for <span>{{target_dataset}}</span> against <span><a href="">{{this_trait}}</a></span> for the top <span>{{return_results}}</span> Results</h3> +</div> +<div class="header-toggle-vis"> + <h4 style="font-weight: bolder;padding: 5px 3px;">Toggle Columns</h4> + <button class="toggle-vis" data-column="1">Index</button> + <button class="toggle-vis" data-column="2">Trait Name</button> + <button class="toggle-vis" data-column="3">Sample r</button> + <button class="toggle-vis" data-column="4">Sample P(r)</button> + <button class="toggle-vis" data-column="5">Num overlap</button> +</div> + <table id="example" class="display" width="100%"> + <thead> + <tr > + <th></th> + <th>index</th> + <th>trait_name</th> + <th>Sample r</th> + <th>Sample r(p)</th> + <th>N</th> + <th>Tissue r</th> + <th>Tissue r(p)</th> + <th>Lit r</th> + </tr> + </thead> + </table> + +{% endblock %} + +{% block js %} +<script type="text/javascript" src="{{ url_for('js', filename='js_alt/md5.min.js') }}"></script> +<script type="text/javascript" src="/static/new/javascript/search_results.js"></script> + +<script language="javascript" type="text/javascript" src="{{ url_for('js', filename='js_alt/underscore.min.js') }}"></script> +<script language="javascript" type="text/javascript" src="{{ url_for('js', filename='jszip/jszip.min.js') }}"></script> +<script language="javascript" type="text/javascript" src="{{ url_for('js', filename='js_alt/underscore.min.js') }}"></script> +<script language="javascript" type="text/javascript" src="{{ url_for('js', filename='DataTables/js/jquery.dataTables.min.js') }}"></script> +<script language="javascript" type="text/javascript" src="{{ url_for('js', filename='DataTablesExtensions/buttons/js/dataTables.buttons.min.js') }}"></script> +<script language="javascript" type="text/javascript" src="{{ url_for('js', filename='DataTablesExtensions/buttons/js/buttons.html5.min.js') }}"></script> +<script language="javascript" type="text/javascript" src="{{ url_for('js', filename='DataTablesExtensions/plugins/sorting/natural.js') }}"></script> +<script language="javascript" type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.1/js/all.min.js"></script> +<script language="javascript" type="text/javascript" src="{{ url_for('js', filename='DataTablesExtensions/scroller/js/dataTables.scroller.min.js') }}"></script> +<script type="text/javascript"> + let correlationResults = {{correlation_results|safe}} + // document.querySelector(".content").innerHTML =correlationResults + // parse the data + let counter = 0; + let corr_type = "tissue"; + correlationResults =correlationResults.map((trait_object)=>{ + let trait_name = Object.keys(trait_object)[0] + + let new_dict = { + "index":counter, + "trait_name":trait_name, + ...trait_object[trait_name] + } + counter++; + return new_dict; + }) + +console.log(correlationResults) + +</script> + +<script type="text/javascript"> + $(document).ready(function() { + let table = $('#example').DataTable( { + "data": correlationResults, + "columns": [ + {"data":corr_type=="sample"?null:"fd","width":"25px"}, + { "data": "index","width":"120px","title":"Index" }, + { "data": "trait_name","title":"TraitName"}, + { "data": "corr_coeffient","defaultContent": "--"}, + { "data": "p_value","defaultContent":"--"}, + { "data": "num_overlap","defaultContent":"--"}, + {"data":"tissue_corr","defaultContent":"--","title":"Tissue r"}, + {"data":"tissue_p_val","defaultContent":"--","title":"Tissue r(p)"}, + {"data":"lit_corr","defaultContent":"--","title":"Lit rho"} + ], + "columnDefs": [ + { + targets:0, + data:null, + defaultContent: '', + orderable: false, + className: 'select-checkbox', + "render":(data,type,row)=>{ + return `<input type="checkbox" class="checkbox trait_checkbox" value="other">` + } + + }, + {className:"trait_col",targets:2}, + {className: "td-styles", targets: "_all"}, + { + "targets":2, + "render":(data,type,row)=>{ + // should use a dynamic dataset name + let urlLink = `/show_trait?trait_id=${data}&dataset=HC_M2_0606_P` + let traitLink = `<a href=${urlLink}>${data}</a>` + return traitLink + }, + } + + ] + } ); + + $(":button.toggle-vis").on("click",function(e){ + e.preventDefault() + let column = table.column($(this).attr("data-column")); + column.visible(!column.visible()) + console.log($(this).attr("data-column")) + }) +} ); +</script> + +{% endblock %}
\ No newline at end of file diff --git a/wqflask/wqflask/templates/tutorials.html b/wqflask/wqflask/templates/tutorials.html index 04eddfa4..18f8d675 100644 --- a/wqflask/wqflask/templates/tutorials.html +++ b/wqflask/wqflask/templates/tutorials.html @@ -562,6 +562,5 @@ $('#myTable').DataTable(); </body> - {% endblock %} diff --git a/wqflask/wqflask/update_search_results.py b/wqflask/wqflask/update_search_results.py index 672f95b1..2e467dc8 100644 --- a/wqflask/wqflask/update_search_results.py +++ b/wqflask/wqflask/update_search_results.py @@ -10,7 +10,8 @@ from utility.benchmark import Bench from utility.logger import getLogger logger = getLogger(__name__) -class GSearch(object): + +class GSearch: def __init__(self, kw): self.type = kw['type'] @@ -51,10 +52,12 @@ class GSearch(object): self.trait_list = [] with Bench("Creating trait objects"): for line in re: - dataset = create_dataset(line[3], "ProbeSet", get_samplelist=False) + dataset = create_dataset( + line[3], "ProbeSet", get_samplelist=False) trait_id = line[4] - #with Bench("Building trait object"): - this_trait = GeneralTrait(dataset=dataset, name=trait_id, get_qtl_info=True, get_sample_info=False) + # with Bench("Building trait object"): + this_trait = GeneralTrait( + dataset=dataset, name=trait_id, get_qtl_info=True, get_sample_info=False) self.trait_list.append(this_trait) elif self.type == "phenotype": @@ -96,7 +99,8 @@ class GSearch(object): for line in re: dataset = create_dataset(line[2], "Publish") trait_id = line[3] - this_trait = GeneralTrait(dataset=dataset, name=trait_id, get_qtl_info=True, get_sample_info=False) + this_trait = GeneralTrait( + dataset=dataset, name=trait_id, get_qtl_info=True, get_sample_info=False) self.trait_list.append(this_trait) self.results = self.convert_to_json() @@ -108,8 +112,8 @@ class GSearch(object): json_dict['data'] = [] for i, trait in enumerate(self.trait_list): - trait_row = { "checkbox": "<INPUT TYPE=\"checkbox\" NAME=\"searchResult\" class=\"checkbox trait_checkbox\" style=\"transform: scale(1.5);\" VALUE=\"{}:{}\">".format(trait.name, trait.dataset.name), - "index": i+1, + trait_row = {"checkbox": "<INPUT TYPE=\"checkbox\" NAME=\"searchResult\" class=\"checkbox trait_checkbox\" style=\"transform: scale(1.5);\" VALUE=\"{}:{}\">".format(trait.name, trait.dataset.name), + "index": i + 1, "species": trait.dataset.group.species, "group": trait.dataset.group.name, "tissue": trait.dataset.tissue, diff --git a/wqflask/wqflask/user_login.py b/wqflask/wqflask/user_login.py index b6e7973f..ff77982f 100644 --- a/wqflask/wqflask/user_login.py +++ b/wqflask/wqflask/user_login.py @@ -29,13 +29,15 @@ from utility.tools import SMTP_CONNECT, SMTP_USERNAME, SMTP_PASSWORD, LOG_SQL_AL THREE_DAYS = 60 * 60 * 24 * 3 + def timestamp(): return datetime.datetime.utcnow().isoformat() + def basic_info(): - return dict(timestamp = timestamp(), - ip_address = request.remote_addr, - user_agent = request.headers.get('User-Agent')) + return dict(timestamp=timestamp(), + ip_address=request.remote_addr, + user_agent=request.headers.get('User-Agent')) def encode_password(pass_gen_fields, unencrypted_password): @@ -43,10 +45,10 @@ def encode_password(pass_gen_fields, unencrypted_password): salt = pass_gen_fields['salt'] else: salt = bytes(pass_gen_fields['salt'], "utf-8") - encrypted_password = pbkdf2.pbkdf2_hex(str(unencrypted_password), + encrypted_password = pbkdf2.pbkdf2_hex(str(unencrypted_password), salt, - pass_gen_fields['iterations'], - pass_gen_fields['keylength'], + pass_gen_fields['iterations'], + pass_gen_fields['keylength'], pass_gen_fields['hashfunc']) pass_gen_fields.pop("unencrypted_password", None) @@ -54,61 +56,65 @@ def encode_password(pass_gen_fields, unencrypted_password): return pass_gen_fields + def set_password(password): pass_gen_fields = { - "unencrypted_password": password, - "algorithm": "pbkdf2", - "hashfunc": "sha256", - "salt": base64.b64encode(os.urandom(32)), - "iterations": 100000, - "keylength": 32, - "created_timestamp": timestamp() + "unencrypted_password": password, + "algorithm": "pbkdf2", + "hashfunc": "sha256", + "salt": base64.b64encode(os.urandom(32)), + "iterations": 100000, + "keylength": 32, + "created_timestamp": timestamp() } assert len(password) >= 6, "Password shouldn't be shorter than 6 characters" - encoded_password = encode_password(pass_gen_fields, pass_gen_fields['unencrypted_password']) + encoded_password = encode_password( + pass_gen_fields, pass_gen_fields['unencrypted_password']) return encoded_password + def get_signed_session_id(user): session_id = str(uuid.uuid4()) session_id_signature = hmac.hmac_creation(session_id) session_id_signed = session_id + ":" + session_id_signature - #ZS: Need to check if this is ever actually used or exists + # ZS: Need to check if this is ever actually used or exists if 'user_id' not in user: user['user_id'] = str(uuid.uuid4()) save_user(user, user['user_id']) if 'github_id' in user: - session = dict(login_time = time.time(), - user_type = "github", - user_id = user['user_id'], - github_id = user['github_id'], - user_name = user['name'], - user_url = user['user_url']) + session = dict(login_time=time.time(), + user_type="github", + user_id=user['user_id'], + github_id=user['github_id'], + user_name=user['name'], + user_url=user['user_url']) elif 'orcid' in user: - session = dict(login_time = time.time(), - user_type = "orcid", - user_id = user['user_id'], - github_id = user['orcid'], - user_name = user['name'], - user_url = user['user_url']) + session = dict(login_time=time.time(), + user_type="orcid", + user_id=user['user_id'], + github_id=user['orcid'], + user_name=user['name'], + user_url=user['user_url']) else: - session = dict(login_time = time.time(), - user_type = "gn2", - user_id = user['user_id'], - user_name = user['full_name'], - user_email_address = user['email_address']) + session = dict(login_time=time.time(), + user_type="gn2", + user_id=user['user_id'], + user_name=user['full_name'], + user_email_address=user['email_address']) key = UserSession.user_cookie_name + ":" + session_id Redis.hmset(key, session) Redis.expire(key, THREE_DAYS) - + return session_id_signed + def send_email(toaddr, msg, fromaddr="no-reply@genenetwork.org"): """Send an E-mail through SMTP_CONNECT host. If SMTP_USERNAME is not 'UNKNOWN' TLS is used @@ -123,28 +129,31 @@ def send_email(toaddr, msg, fromaddr="no-reply@genenetwork.org"): server.login(SMTP_USERNAME, SMTP_PASSWORD) server.sendmail(fromaddr, toaddr, msg) server.quit() - logger.info("Successfully sent email to "+toaddr) + logger.info("Successfully sent email to " + toaddr) + -def send_verification_email(user_details, template_name = "email/user_verification.txt", key_prefix = "verification_code", subject = "GeneNetwork e-mail verification"): +def send_verification_email(user_details, template_name="email/user_verification.txt", key_prefix="verification_code", subject="GeneNetwork e-mail verification"): verification_code = str(uuid.uuid4()) key = key_prefix + ":" + verification_code - data = json.dumps(dict(id=user_details['user_id'], timestamp = timestamp())) + data = json.dumps(dict(id=user_details['user_id'], timestamp=timestamp())) Redis.set(key, data) Redis.expire(key, THREE_DAYS) recipient = user_details['email_address'] - body = render_template(template_name, verification_code = verification_code) + body = render_template(template_name, verification_code=verification_code) send_email(recipient, subject, body) return {"recipient": recipient, "subject": subject, "body": body} -def send_invitation_email(user_email, temp_password, template_name = "email/user_invitation.txt", subject = "You've been added to a GeneNetwork user group"): + +def send_invitation_email(user_email, temp_password, template_name="email/user_invitation.txt", subject="You've been added to a GeneNetwork user group"): recipient = user_email body = render_template(template_name, temp_password) send_email(recipient, subject, body) return {"recipient": recipient, "subject": subject, "body": body} + @app.route("/manage/verify_email") def verify_email(): if 'code' in request.args: @@ -153,27 +162,32 @@ def verify_email(): # As long as they have access to the email account # We might as well log them in session_id_signed = get_signed_session_id(user_details) - flash("Thank you for logging in {}.".format(user_details['full_name']), "alert-success") - response = make_response(redirect(url_for('index_page', import_collections = import_col, anon_id = anon_id))) - response.set_cookie(UserSession.user_cookie_name, session_id_signed, max_age=None) + flash("Thank you for logging in {}.".format( + user_details['full_name']), "alert-success") + response = make_response(redirect( + url_for('index_page', import_collections=import_col, anon_id=anon_id))) + response.set_cookie(UserSession.user_cookie_name, + session_id_signed, max_age=None) return response else: - flash("Invalid code: Password reset code does not exist or might have expired!", "error") + flash( + "Invalid code: Password reset code does not exist or might have expired!", "error") + @app.route("/n/login", methods=('GET', 'POST')) def login(): params = request.form if request.form else request.args logger.debug("in login params are:", params) - if not params: #ZS: If coming to page for first time + if not params: # ZS: If coming to page for first time from utility.tools import GITHUB_AUTH_URL, GITHUB_CLIENT_ID, ORCID_AUTH_URL, ORCID_CLIENT_ID external_login = {} if GITHUB_AUTH_URL and GITHUB_CLIENT_ID != 'UNKNOWN': external_login["github"] = GITHUB_AUTH_URL if ORCID_AUTH_URL and ORCID_CLIENT_ID != 'UNKNOWN': external_login["orcid"] = ORCID_AUTH_URL - return render_template("new_security/login_user.html", external_login = external_login, redis_is_available=is_redis_available()) - else: #ZS: After clicking sign-in + return render_template("new_security/login_user.html", external_login=external_login, redis_is_available=is_redis_available()) + else: # ZS: After clicking sign-in if 'type' in params and 'uid' in params: user_details = get_user_by_unique_column("user_id", params['uid']) if user_details: @@ -186,31 +200,36 @@ def login(): display_id = user_details['orcid'] else: display_id = "" - flash("Thank you for logging in {}.".format(display_id), "alert-success") + flash("Thank you for logging in {}.".format( + display_id), "alert-success") response = make_response(redirect(url_for('index_page'))) - response.set_cookie(UserSession.user_cookie_name, session_id_signed, max_age=None) + response.set_cookie( + UserSession.user_cookie_name, session_id_signed, max_age=None) else: flash("Something went unexpectedly wrong.", "alert-danger") - response = make_response(redirect(url_for('index_page'))) + response = make_response(redirect(url_for('index_page'))) return response else: - user_details = get_user_by_unique_column("email_address", params['email_address']) + user_details = get_user_by_unique_column( + "email_address", params['email_address']) password_match = False if user_details: submitted_password = params['password'] pwfields = user_details['password'] if isinstance(pwfields, str): pwfields = json.loads(pwfields) - encrypted_pass_fields = encode_password(pwfields, submitted_password) - password_match = pbkdf2.safe_str_cmp(encrypted_pass_fields['password'], pwfields['password']) + encrypted_pass_fields = encode_password( + pwfields, submitted_password) + password_match = pbkdf2.safe_str_cmp( + encrypted_pass_fields['password'], pwfields['password']) - else: # Invalid e-mail + else: # Invalid e-mail flash("Invalid e-mail address. Please try again.", "alert-danger") response = make_response(redirect(url_for('login'))) return response - if password_match: # If password correct - if user_details['confirmed']: # If account confirmed + if password_match: # If password correct + if user_details['confirmed']: # If account confirmed import_col = "false" anon_id = "" if 'import_collections' in params: @@ -218,20 +237,25 @@ def login(): anon_id = params['anon_id'] session_id_signed = get_signed_session_id(user_details) - flash("Thank you for logging in {}.".format(user_details['full_name']), "alert-success") - response = make_response(redirect(url_for('index_page', import_collections = import_col, anon_id = anon_id))) - response.set_cookie(UserSession.user_cookie_name, session_id_signed, max_age=None) + flash("Thank you for logging in {}.".format( + user_details['full_name']), "alert-success") + response = make_response(redirect( + url_for('index_page', import_collections=import_col, anon_id=anon_id))) + response.set_cookie( + UserSession.user_cookie_name, session_id_signed, max_age=None) return response else: - email_ob = send_verification_email(user_details, template_name = "email/user_verification.txt") + email_ob = send_verification_email( + user_details, template_name="email/user_verification.txt") return render_template("newsecurity/verification_still_needed.html", subject=email_ob['subject']) - else: # Incorrect password - #ZS: It previously seemed to store that there was an incorrect log-in attempt here, but it did so in the MySQL DB so this might need to be reproduced with Redis + else: # Incorrect password + # ZS: It previously seemed to store that there was an incorrect log-in attempt here, but it did so in the MySQL DB so this might need to be reproduced with Redis flash("Invalid password. Please try again.", "alert-danger") response = make_response(redirect(url_for('login'))) return response + @app.route("/n/login/github_oauth2", methods=('GET', 'POST')) def github_oauth2(): from utility.tools import GITHUB_CLIENT_ID, GITHUB_CLIENT_SECRET, GITHUB_AUTH_URL @@ -242,34 +266,39 @@ def github_oauth2(): "code": code } - result = requests.post("https://github.com/login/oauth/access_token", json=data) - result_dict = {arr[0]:arr[1] for arr in [tok.split("=") for tok in result.text.split("&")]} + result = requests.post( + "https://github.com/login/oauth/access_token", json=data) + result_dict = {arr[0]: arr[1] + for arr in [tok.split("=") for tok in result.text.split("&")]} github_user = get_github_user_details(result_dict["access_token"]) user_details = get_user_by_unique_column("github_id", github_user["id"]) if user_details == None: user_details = { - "user_id": str(uuid.uuid4()), - "name": github_user["name"].encode("utf-8") if github_user["name"] else "None", + "user_id": str(uuid.uuid4()), + "name": github_user["name"].encode("utf-8") if github_user["name"] else "None", "github_id": github_user["id"], - "user_url": github_user["html_url"].encode("utf-8"), - "login_type": "github", - "organization": "", - "active": 1, + "user_url": github_user["html_url"].encode("utf-8"), + "login_type": "github", + "organization": "", + "active": 1, "confirmed": 1 } save_user(user_details, user_details["user_id"]) - url = "/n/login?type=github&uid="+user_details["user_id"] + url = "/n/login?type=github&uid=" + user_details["user_id"] return redirect(url) + def get_github_user_details(access_token): from utility.tools import GITHUB_API_URL - result = requests.get(GITHUB_API_URL, headers = {'Authorization':'token ' + access_token }).content + result = requests.get(GITHUB_API_URL, headers={ + 'Authorization': 'token ' + access_token}).content return json.loads(result) + @app.route("/n/login/orcid_oauth2", methods=('GET', 'POST')) def orcid_oauth2(): from uuid import uuid4 @@ -279,8 +308,8 @@ def orcid_oauth2(): url = "/n/login" if code: data = { - "client_id": ORCID_CLIENT_ID, - "client_secret": ORCID_CLIENT_SECRET, + "client_id": ORCID_CLIENT_ID, + "client_secret": ORCID_CLIENT_SECRET, "grant_type": "authorization_code", "redirect_uri": GN2_BRANCH_URL + "n/login/orcid_oauth2", "code": code @@ -292,25 +321,27 @@ def orcid_oauth2(): user_details = get_user_by_unique_column("orcid", result_dict["orcid"]) if user_details == None: user_details = { - "user_id": str(uuid4()), - "name": result_dict["name"], - "orcid": result_dict["orcid"], - "user_url": "%s/%s" % ("/".join(ORCID_AUTH_URL.split("/")[:-2]), result_dict["orcid"]), - "login_type": "orcid", - "organization": "", - "active": 1, + "user_id": str(uuid4()), + "name": result_dict["name"], + "orcid": result_dict["orcid"], + "user_url": "%s/%s" % ("/".join(ORCID_AUTH_URL.split("/")[:-2]), result_dict["orcid"]), + "login_type": "orcid", + "organization": "", + "active": 1, "confirmed": 1 } save_user(user_details, user_details["user_id"]) - url = "/n/login?type=orcid&uid="+user_details["user_id"] + url = "/n/login?type=orcid&uid=" + user_details["user_id"] else: flash("There was an error getting code from ORCID") return redirect(url) + def get_github_user_details(access_token): from utility.tools import GITHUB_API_URL - result = requests.get(GITHUB_API_URL, headers = {'Authorization':'token ' + access_token }).content + result = requests.get(GITHUB_API_URL, headers={ + 'Authorization': 'token ' + access_token}).content return json.loads(result) @@ -325,6 +356,7 @@ def logout(): response.set_cookie(UserSession.user_cookie_name, '', expires=0) return response + @app.route("/n/forgot_password", methods=['GET']) def forgot_password(): """Entry point for forgotten password""" @@ -333,15 +365,16 @@ def forgot_password(): print("ERRORS: ", errors) return render_template("new_security/forgot_password.html", errors=errors) + def send_forgot_password_email(verification_email): from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText - template_name = "email/forgot_password.txt" + template_name = "email/forgot_password.txt" key_prefix = "forgot_password_code" subject = "GeneNetwork password reset" fromaddr = "no-reply@genenetwork.org" - + verification_code = str(uuid.uuid4()) key = key_prefix + ":" + verification_code @@ -353,7 +386,7 @@ def send_forgot_password_email(verification_email): save_verification_code(verification_email, verification_code) - body = render_template(template_name, verification_code = verification_code) + body = render_template(template_name, verification_code=verification_code) msg = MIMEMultipart() msg["To"] = verification_email @@ -365,6 +398,7 @@ def send_forgot_password_email(verification_email): return subject + @app.route("/n/forgot_password_submit", methods=('POST',)) def forgot_password_submit(): """When a forgotten password form is submitted we get here""" @@ -373,19 +407,23 @@ def forgot_password_submit(): next_page = None if email_address != "": logger.debug("Wants to send password E-mail to ", email_address) - user_details = get_user_by_unique_column("email_address", email_address) + user_details = get_user_by_unique_column( + "email_address", email_address) if user_details: - email_subject = send_forgot_password_email(user_details["email_address"]) + email_subject = send_forgot_password_email( + user_details["email_address"]) return render_template("new_security/forgot_password_step2.html", subject=email_subject) else: - flash("The e-mail entered is not associated with an account.", "alert-danger") + flash("The e-mail entered is not associated with an account.", + "alert-danger") return redirect(url_for("forgot_password")) else: flash("You MUST provide an email", "alert-danger") return redirect(url_for("forgot_password")) + @app.route("/n/password_reset", methods=['GET']) def password_reset(): """Entry point after user clicks link in E-mail""" @@ -400,11 +438,13 @@ def password_reset(): return render_template( "new_security/password_reset.html", user_encode=user_details["email_address"]) else: - flash("Invalid code: Password reset code does not exist or might have expired!", "error") + flash( + "Invalid code: Password reset code does not exist or might have expired!", "error") return redirect(url_for("login")) else: return redirect(url_for("login")) + @app.route("/n/password_reset_step2", methods=('POST',)) def password_reset_step2(): """Handle confirmation E-mail for password reset""" @@ -422,44 +462,52 @@ def password_reset_step2(): flash("Password changed successfully. You can now sign in.", "alert-info") return redirect(url_for('login')) + def register_user(params): - thank_you_mode = False - errors = [] - user_details = {} + thank_you_mode = False + errors = [] + user_details = {} - user_details['email_address'] = params.get('email_address', '').encode("utf-8").strip() - if not (5 <= len(user_details['email_address']) <= 50): - errors.append('Email Address needs to be between 5 and 50 characters.') - else: - email_exists = get_user_by_unique_column("email_address", user_details['email_address']) - if email_exists: - errors.append('User already exists with that email') + user_details['email_address'] = params.get( + 'email_address', '').encode("utf-8").strip() + if not (5 <= len(user_details['email_address']) <= 50): + errors.append( + 'Email Address needs to be between 5 and 50 characters.') + else: + email_exists = get_user_by_unique_column( + "email_address", user_details['email_address']) + if email_exists: + errors.append('User already exists with that email') - user_details['full_name'] = params.get('full_name', '').encode("utf-8").strip() - if not (5 <= len(user_details['full_name']) <= 50): - errors.append('Full Name needs to be between 5 and 50 characters.') + user_details['full_name'] = params.get( + 'full_name', '').encode("utf-8").strip() + if not (5 <= len(user_details['full_name']) <= 50): + errors.append('Full Name needs to be between 5 and 50 characters.') - user_details['organization'] = params.get('organization', '').encode("utf-8").strip() - if user_details['organization'] and not (5 <= len(user_details['organization']) <= 50): - errors.append('Organization needs to be empty or between 5 and 50 characters.') + user_details['organization'] = params.get( + 'organization', '').encode("utf-8").strip() + if user_details['organization'] and not (5 <= len(user_details['organization']) <= 50): + errors.append( + 'Organization needs to be empty or between 5 and 50 characters.') - password = str(params.get('password', '')) - if not (6 <= len(password)): - errors.append('Password needs to be at least 6 characters.') + password = str(params.get('password', '')) + if not (6 <= len(password)): + errors.append('Password needs to be at least 6 characters.') - if params.get('password_confirm') != password: - errors.append("Passwords don't match.") + if params.get('password_confirm') != password: + errors.append("Passwords don't match.") - user_details['password'] = set_password(password) - user_details['user_id'] = str(uuid.uuid4()) - user_details['confirmed'] = 1 + user_details['password'] = set_password(password) + user_details['user_id'] = str(uuid.uuid4()) + user_details['confirmed'] = 1 - user_details['registration_info'] = basic_info() + user_details['registration_info'] = basic_info() - if len(errors) == 0: - save_user(user_details, user_details['user_id']) + if len(errors) == 0: + save_user(user_details, user_details['user_id']) + + return errors - return errors @app.route("/n/register", methods=('GET', 'POST')) def register(): @@ -473,11 +521,13 @@ def register(): errors = register_user(params) if len(errors) == 0: - flash("Registration successful. You may login with your new account", "alert-info") + flash( + "Registration successful. You may login with your new account", "alert-info") return redirect(url_for("login")) return render_template("new_security/register_user.html", values=params, errors=errors) + @app.errorhandler(401) def unauthorized(error): return redirect(url_for('login')) diff --git a/wqflask/wqflask/user_manager.py b/wqflask/wqflask/user_manager.py deleted file mode 100644 index 7b25b68e..00000000 --- a/wqflask/wqflask/user_manager.py +++ /dev/null @@ -1,1050 +0,0 @@ -import os -import hashlib -import datetime -import time -import uuid -import hmac -import base64 -import redis # used for collections -import simplejson as json -import requests - -from base.data_set import create_datasets_list - -from flask import g -from flask import render_template -from flask import url_for -from flask import request -from flask import make_response -from flask import redirect -from flask import flash - -from wqflask import app -from wqflask import pbkdf2 # password hashing -from wqflask.database import db_session -from wqflask import model - -from smtplib import SMTP - -from pprint import pformat as pf - -from utility import Bunch -from utility import Struct -from utility.logger import getLogger - -from utility.redis_tools import get_user_id -from utility.redis_tools import get_user_by_unique_column -from utility.redis_tools import set_user_attribute -from utility.redis_tools import save_user -from utility.redis_tools import save_verification_code -from utility.redis_tools import check_verification_code -from utility.redis_tools import get_user_collections -from utility.redis_tools import save_collections - -from utility.tools import SMTP_CONNECT -from utility.tools import SMTP_USERNAME -from utility.tools import SMTP_PASSWORD - - -logger = getLogger(__name__) - - -Redis = redis.StrictRedis() - -THREE_DAYS = 60 * 60 * 24 * 3 - - -def timestamp(): - return datetime.datetime.utcnow().isoformat() - - -class AnonUser(object): - """Anonymous user handling""" - cookie_name = 'anon_user_v1' - - def __init__(self): - self.cookie = request.cookies.get(self.cookie_name) - if self.cookie: - logger.debug("ANON COOKIE ALREADY EXISTS") - self.anon_id = verify_cookie(self.cookie) - else: - logger.debug("CREATING NEW ANON COOKIE") - self.anon_id, self.cookie = create_signed_cookie() - - self.key = "anon_collection:v1:{}".format(self.anon_id) - - def add_collection(self, new_collection): - collection_dict = dict(name = new_collection.name, - created_timestamp = datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p'), - changed_timestamp = datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p'), - num_members = new_collection.num_members, - members = new_collection.get_members()) - - Redis.set(self.key, json.dumps(collection_dict)) - Redis.expire(self.key, 60 * 60 * 24 * 365) - - def delete_collection(self, collection_name): - existing_collections = self.get_collections() - updated_collections = [] - for i, collection in enumerate(existing_collections): - if collection['name'] == collection_name: - continue - else: - this_collection = {} - this_collection['id'] = collection['id'] - this_collection['name'] = collection['name'] - this_collection['created_timestamp'] = collection['created_timestamp'].strftime('%b %d %Y %I:%M%p') - this_collection['changed_timestamp'] = collection['changed_timestamp'].strftime('%b %d %Y %I:%M%p') - this_collection['num_members'] = collection['num_members'] - this_collection['members'] = collection['members'] - updated_collections.append(this_collection) - - Redis.set(self.key, json.dumps(updated_collections)) - - def get_collections(self): - json_collections = Redis.get(self.key) - if json_collections == None or json_collections == "None": - return [] - else: - collections = json.loads(json_collections) - for collection in collections: - collection['created_timestamp'] = datetime.datetime.strptime(collection['created_timestamp'], '%b %d %Y %I:%M%p') - collection['changed_timestamp'] = datetime.datetime.strptime(collection['changed_timestamp'], '%b %d %Y %I:%M%p') - - collections = sorted(collections, key = lambda i: i['changed_timestamp'], reverse = True) - return collections - - def import_traits_to_user(self): - result = Redis.get(self.key) - collections_list = json.loads(result if result else "[]") - for collection in collections_list: - collection_exists = g.user_session.get_collection_by_name(collection['name']) - if collection_exists: - continue - else: - g.user_session.add_collection(collection['name'], collection['members']) - - def display_num_collections(self): - """ - Returns the number of collections or a blank string if there are zero. - - Because this is so unimportant...we wrap the whole thing in a try/expect...last thing we - want is a webpage not to be displayed because of an error here - - Importand TODO: use redis to cache this, don't want to be constantly computing it - """ - try: - num = len(self.get_collections()) - if num > 0: - return num - else: - return "" - except Exception as why: - print("Couldn't display_num_collections:", why) - return "" - - -def verify_cookie(cookie): - the_uuid, separator, the_signature = cookie.partition(':') - assert len(the_uuid) == 36, "Is session_id a uuid?" - assert separator == ":", "Expected a : here" - assert the_signature == actual_hmac_creation(the_uuid), "Uh-oh, someone tampering with the cookie?" - return the_uuid - -def create_signed_cookie(): - the_uuid = str(uuid.uuid4()) - signature = actual_hmac_creation(the_uuid) - uuid_signed = the_uuid + ":" + signature - logger.debug("uuid_signed:", uuid_signed) - return the_uuid, uuid_signed - -class UserSession(object): - """Logged in user handling""" - - cookie_name = 'session_id_v1' - - def __init__(self): - cookie = request.cookies.get(self.cookie_name) - if not cookie: - logger.debug("NO USER COOKIE") - self.logged_in = False - return - else: - session_id = verify_cookie(cookie) - - self.redis_key = self.cookie_name + ":" + session_id - logger.debug("self.redis_key is:", self.redis_key) - self.session_id = session_id - self.record = Redis.hgetall(self.redis_key) - - if not self.record: - # This will occur, for example, when the browser has been left open over a long - # weekend and the site hasn't been visited by the user - self.logged_in = False - - ########### Grrr...this won't work because of the way flask handles cookies - # Delete the cookie - #response = make_response(redirect(url_for('login'))) - #response.set_cookie(self.cookie_name, '', expires=0) - #flash( - # "Due to inactivity your session has expired. If you'd like please login again.") - #return response - return - - if Redis.ttl(self.redis_key) < THREE_DAYS: - # (Almost) everytime the user does something we extend the session_id in Redis... - logger.debug("Extending ttl...") - Redis.expire(self.redis_key, THREE_DAYS) - - logger.debug("record is:", self.record) - self.logged_in = True - - @property - def user_id(self): - """Shortcut to the user_id""" - if 'user_id' in self.record: - return self.record['user_id'] - else: - return '' - - @property - def redis_user_id(self): - """User id from ElasticSearch (need to check if this is the same as the id stored in self.records)""" - - user_email = self.record['user_email_address'] - - #ZS: Get user's collections if they exist - user_id = None - user_id = get_user_id("email_address", user_email) - return user_id - - @property - def user_name(self): - """Shortcut to the user_name""" - if 'user_name' in self.record: - return self.record['user_name'] - else: - return '' - - @property - def user_collections(self): - """List of user's collections""" - - #ZS: Get user's collections if they exist - collections = get_user_collections(self.redis_user_id) - return collections - - @property - def num_collections(self): - """Number of user's collections""" - - return len(self.user_collections) - - def add_collection(self, collection_name, traits): - """Add collection into ElasticSearch""" - - collection_dict = {'id': str(uuid.uuid4()), - 'name': collection_name, - 'created_timestamp': datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p'), - 'changed_timestamp': datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p'), - 'num_members': len(traits), - 'members': list(traits) } - - current_collections = self.user_collections - current_collections.append(collection_dict) - self.update_collections(current_collections) - - return collection_dict['id'] - - def delete_collection(self, collection_id): - """Remove collection with given ID""" - - updated_collections = [] - for collection in self.user_collections: - if collection['id'] == collection_id: - continue - else: - updated_collections.append(collection) - - self.update_collections(updated_collections) - - return collection['name'] - - def add_traits_to_collection(self, collection_id, traits_to_add): - """Add specified traits to a collection""" - - this_collection = self.get_collection_by_id(collection_id) - - updated_collection = this_collection - updated_traits = this_collection['members'] + traits_to_add - - updated_collection['members'] = updated_traits - updated_collection['num_members'] = len(updated_traits) - updated_collection['changed_timestamp'] = datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p') - - updated_collections = [] - for collection in self.user_collections: - if collection['id'] == collection_id: - updated_collections.append(updated_collection) - else: - updated_collections.append(collection) - - self.update_collections(updated_collections) - - def remove_traits_from_collection(self, collection_id, traits_to_remove): - """Remove specified traits from a collection""" - - this_collection = self.get_collection_by_id(collection_id) - - updated_collection = this_collection - updated_traits = [] - for trait in this_collection['members']: - if trait in traits_to_remove: - continue - else: - updated_traits.append(trait) - - updated_collection['members'] = updated_traits - updated_collection['num_members'] = len(updated_traits) - updated_collection['changed_timestamp'] = datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p') - - updated_collections = [] - for collection in self.user_collections: - if collection['id'] == collection_id: - updated_collections.append(updated_collection) - else: - updated_collections.append(collection) - - self.update_collections(updated_collections) - - return updated_traits - - def get_collection_by_id(self, collection_id): - for collection in self.user_collections: - if collection['id'] == collection_id: - return collection - - def get_collection_by_name(self, collection_name): - for collection in self.user_collections: - if collection['name'] == collection_name: - return collection - - return None - - def update_collections(self, updated_collections): - collection_body = json.dumps(updated_collections) - - save_collections(self.redis_user_id, collection_body) - - def delete_session(self): - # And more importantly delete the redis record - Redis.delete(self.cookie_name) - logger.debug("At end of delete_session") - -@app.before_request -def get_cookie(): - logger.info("@app.before_request get cookie") - g.user_session = UserSession() - g.cookie_session = AnonUser() - -#@app.after_request -def set_cookie(response): - if not request.cookies.get(g.cookie_session.cookie_name): - response.set_cookie(g.cookie_session.cookie_name, g.cookie_session.cookie) - return response - -class UsersManager(object): - def __init__(self): - self.users = model.User.query.all() - logger.debug("Users are:", self.users) - -class UserManager(object): - def __init__(self, kw): - self.user_id = kw['user_id'] - logger.debug("In UserManager locals are:", pf(locals())) - #self.user = model.User.get(user_id) - #logger.debug("user is:", user) - self.user = model.User.query.get(self.user_id) - logger.debug("user is:", self.user) - datasets = create_datasets_list() - for dataset in datasets: - if not dataset.check_confidentiality(): - continue - logger.debug("\n Name:", dataset.name) - logger.debug(" Type:", dataset.type) - logger.debug(" ID:", dataset.id) - logger.debug(" Confidential:", dataset.check_confidentiality()) - #logger.debug(" ---> self.datasets:", self.datasets) - - -class RegisterUser(object): - def __init__(self, kw): - self.thank_you_mode = False - self.errors = [] - self.user = Bunch() - - self.user.email_address = kw.get('email_address', '').encode("utf-8").strip() - if not (5 <= len(self.user.email_address) <= 50): - self.errors.append('Email Address needs to be between 5 and 50 characters.') - else: - email_exists = get_user_by_unique_column("email_address", self.user.email_address) - #email_exists = get_user_by_unique_column(es, "email_address", self.user.email_address) - if email_exists: - self.errors.append('User already exists with that email') - - self.user.full_name = kw.get('full_name', '').encode("utf-8").strip() - if not (5 <= len(self.user.full_name) <= 50): - self.errors.append('Full Name needs to be between 5 and 50 characters.') - - self.user.organization = kw.get('organization', '').encode("utf-8").strip() - if self.user.organization and not (5 <= len(self.user.organization) <= 50): - self.errors.append('Organization needs to be empty or between 5 and 50 characters.') - - password = str(kw.get('password', '')) - if not (6 <= len(password)): - self.errors.append('Password needs to be at least 6 characters.') - - if kw.get('password_confirm') != password: - self.errors.append("Passwords don't match.") - - if self.errors: - return - - logger.debug("No errors!") - - set_password(password, self.user) - self.user.user_id = str(uuid.uuid4()) - self.user.confirmed = 1 - - self.user.registration_info = json.dumps(basic_info(), sort_keys=True) - save_user(self.user.__dict__, self.user.user_id) - -def set_password(password, user): - pwfields = Bunch() - - pwfields.algorithm = "pbkdf2" - pwfields.hashfunc = "sha256" - #hashfunc = getattr(hashlib, pwfields.hashfunc) - - # Encoding it to base64 makes storing it in json much easier - pwfields.salt = base64.b64encode(os.urandom(32)) - - # https://forums.lastpass.com/viewtopic.php?t=84104 - pwfields.iterations = 100000 - pwfields.keylength = 32 - - pwfields.created_ts = timestamp() - # One more check on password length - assert len(password) >= 6, "Password shouldn't be so short here" - - logger.debug("pwfields:", vars(pwfields)) - logger.debug("locals:", locals()) - - enc_password = Password(password, - pwfields.salt, - pwfields.iterations, - pwfields.keylength, - pwfields.hashfunc) - - pwfields.password = enc_password.password - pwfields.encrypt_time = enc_password.encrypt_time - - user.password = json.dumps(pwfields.__dict__, - sort_keys=True, - ) - - -class VerificationEmail(object): - template_name = "email/verification.txt" - key_prefix = "verification_code" - subject = "GeneNetwork email verification" - - def __init__(self, user): - verification_code = str(uuid.uuid4()) - key = self.key_prefix + ":" + verification_code - - data = json.dumps(dict(id=user.user_id, - timestamp=timestamp()) - ) - - Redis.set(key, data) - #two_days = 60 * 60 * 24 * 2 - Redis.expire(key, THREE_DAYS) - to = user.email_address - subject = self.subject - body = render_template(self.template_name, - verification_code = verification_code) - send_email(to, subject, body) - -class ForgotPasswordEmail(VerificationEmail): - template_name = "email/forgot_password.txt" - key_prefix = "forgot_password_code" - subject = "GeneNetwork password reset" - fromaddr = "no-reply@genenetwork.org" - - def __init__(self, toaddr): - from email.MIMEMultipart import MIMEMultipart - from email.MIMEText import MIMEText - verification_code = str(uuid.uuid4()) - key = self.key_prefix + ":" + verification_code - - data = { - "verification_code": verification_code, - "email_address": toaddr, - "timestamp": timestamp() - } - - save_verification_code(toaddr, verification_code) - - - subject = self.subject - body = render_template( - self.template_name, - verification_code = verification_code) - - msg = MIMEMultipart() - msg["To"] = toaddr - msg["Subject"] = self.subject - msg["From"] = self.fromaddr - msg.attach(MIMEText(body, "plain")) - - send_email(toaddr, msg.as_string()) - - -class Password(object): - def __init__(self, unencrypted_password, salt, iterations, keylength, hashfunc): - hashfunc = getattr(hashlib, hashfunc) - logger.debug("hashfunc is:", hashfunc) - # On our computer it takes around 1.4 seconds in 2013 - start_time = time.time() - salt = base64.b64decode(salt) - self.password = pbkdf2.pbkdf2_hex(str(unencrypted_password), - salt, iterations, keylength, hashfunc) - self.encrypt_time = round(time.time() - start_time, 3) - logger.debug("Creating password took:", self.encrypt_time) - - -def basic_info(): - return dict(timestamp = timestamp(), - ip_address = request.remote_addr, - user_agent = request.headers.get('User-Agent')) - -#@app.route("/manage/verify_email") -def verify_email(): - user = DecodeUser(VerificationEmail.key_prefix).user - user.confirmed = json.dumps(basic_info(), sort_keys=True) - db_session.commit() - - # As long as they have access to the email account - # We might as well log them in - - session_id_signed = LoginUser().successful_login(user) - response = make_response(render_template("new_security/thank_you.html")) - response.set_cookie(UserSession.cookie_name, session_id_signed) - return response - -#@app.route("/n/password_reset", methods=['GET']) -def password_reset(): - """Entry point after user clicks link in E-mail""" - logger.debug("in password_reset request.url is:", request.url) - # We do this mainly just to assert that it's in proper form for displaying next page - # Really not necessary but doesn't hurt - # user_encode = DecodeUser(ForgotPasswordEmail.key_prefix).reencode_standalone() - verification_code = request.args.get('code') - hmac = request.args.get('hm') - - if verification_code: - user_email = check_verification_code(verification_code) - if user_email: - user_details = get_user_by_unique_column('email_address', user_email) - if user_details: - return render_template( - "new_security/password_reset.html", user_encode=user_details["user_id"]) - else: - flash("Invalid code: User no longer exists!", "error") - else: - flash("Invalid code: Password reset code does not exist or might have expired!", "error") - else: - return redirect(url_for("login")) - -#@app.route("/n/password_reset_step2", methods=('POST',)) -def password_reset_step2(): - """Handle confirmation E-mail for password reset""" - logger.debug("in password_reset request.url is:", request.url) - - errors = [] - user_id = request.form['user_encode'] - - logger.debug("locals are:", locals()) - - - user = Bunch() - password = request.form['password'] - set_password(password, user) - - set_user_attribute(user_id, "password", user.__dict__.get("password")) - - flash("Password changed successfully. You can now sign in.", "alert-info") - response = make_response(redirect(url_for('login'))) - - return response - -class DecodeUser(object): - - def __init__(self, code_prefix): - verify_url_hmac(request.url) - - #params = urlparse.parse_qs(url) - - self.verification_code = request.args['code'] - self.user = self.actual_get_user(code_prefix, self.verification_code) - - def reencode_standalone(self): - hmac = actual_hmac_creation(self.verification_code) - return self.verification_code + ":" + hmac - - @staticmethod - def actual_get_user(code_prefix, verification_code): - data = Redis.get(code_prefix + ":" + verification_code) - logger.debug("in get_coded_user, data is:", data) - data = json.loads(data) - logger.debug("data is:", data) - return model.User.query.get(data['id']) - -#@app.route("/n/login", methods=('GET', 'POST')) -def login(): - lu = LoginUser() - login_type = request.args.get("type") - if login_type: - uid = request.args.get("uid") - return lu.oauth2_login(login_type, uid) - else: - return lu.standard_login() - -#@app.route("/n/login/github_oauth2", methods=('GET', 'POST')) -def github_oauth2(): - from utility.tools import GITHUB_CLIENT_ID, GITHUB_CLIENT_SECRET - code = request.args.get("code") - data = { - "client_id": GITHUB_CLIENT_ID, - "client_secret": GITHUB_CLIENT_SECRET, - "code": code - } - result = requests.post("https://github.com/login/oauth/access_token", json=data) - result_dict = {arr[0]:arr[1] for arr in [tok.split("=") for tok in [token.encode("utf-8") for token in result.text.split("&")]]} - - github_user = get_github_user_details(result_dict["access_token"]) - - user_details = get_user_by_unique_column("github_id", github_user["id"]) - if user_details == None: - user_details = { - "user_id": str(uuid.uuid4()) - , "name": github_user["name"].encode("utf-8") - , "github_id": github_user["id"] - , "user_url": github_user["html_url"].encode("utf-8") - , "login_type": "github" - , "organization": "" - , "active": 1 - , "confirmed": 1 - } - save_user(user_details, user_details["user_id"]) - - url = "/n/login?type=github&uid="+user_details["user_id"] - return redirect(url) - -#@app.route("/n/login/orcid_oauth2", methods=('GET', 'POST')) -def orcid_oauth2(): - from uuid import uuid4 - from utility.tools import ORCID_CLIENT_ID, ORCID_CLIENT_SECRET, ORCID_TOKEN_URL, ORCID_AUTH_URL - code = request.args.get("code") - error = request.args.get("error") - url = "/n/login" - if code: - data = { - "client_id": ORCID_CLIENT_ID - , "client_secret": ORCID_CLIENT_SECRET - , "grant_type": "authorization_code" - , "code": code - } - result = requests.post(ORCID_TOKEN_URL, data=data) - result_dict = json.loads(result.text.encode("utf-8")) - - user_details = get_user_by_unique_column("orcid", result_dict["orcid"]) - if user_details == None: - user_details = { - "user_id": str(uuid4()) - , "name": result_dict["name"] - , "orcid": result_dict["orcid"] - , "user_url": "%s/%s" % ( - "/".join(ORCID_AUTH_URL.split("/")[:-2]), - result_dict["orcid"]) - , "login_type": "orcid" - , "organization": "" - , "active": 1 - , "confirmed": 1 - } - save_user(user_details, user_details["user_id"]) - - url = "/n/login?type=orcid&uid="+user_details["user_id"] - else: - flash("There was an error getting code from ORCID") - return redirect(url) - -def get_github_user_details(access_token): - from utility.tools import GITHUB_API_URL - result = requests.get(GITHUB_API_URL, params={"access_token":access_token}) - return result.json() - -class LoginUser(object): - remember_time = 60 * 60 * 24 * 30 # One month in seconds - - def __init__(self): - self.remember_me = False - self.logged_in = False - - def oauth2_login(self, login_type, user_id): - """Login via an OAuth2 provider""" - - user_details = get_user_by_unique_column("user_id", user_id) - if user_details: - user = model.User() - user.id = user_details["user_id"] if user_details["user_id"] == None else "N/A" - user.full_name = user_details["name"] - user.login_type = user_details["login_type"] - return self.actual_login(user) - else: - flash("Error logging in via OAuth2") - return make_response(redirect(url_for('login'))) - - def standard_login(self): - """Login through the normal form""" - params = request.form if request.form else request.args - logger.debug("in login params are:", params) - - if not params: - from utility.tools import GITHUB_AUTH_URL, GITHUB_CLIENT_ID, ORCID_AUTH_URL, ORCID_CLIENT_ID - external_login = {} - if GITHUB_AUTH_URL and GITHUB_CLIENT_ID != 'UNKNOWN': - external_login["github"] = GITHUB_AUTH_URL - if ORCID_AUTH_URL and ORCID_CLIENT_ID != 'UNKNOWN': - external_login["orcid"] = ORCID_AUTH_URL - - return render_template( - "new_security/login_user.html" - , external_login=external_login - , redis_is_available = is_redis_available()) - else: - user_details = get_user_by_unique_column("email_address", params["email_address"]) - #user_details = get_user_by_unique_column(es, "email_address", params["email_address"]) - user = None - valid = None - if user_details: - user = model.User(); - for key in user_details: - user.__dict__[key] = user_details[key] - valid = False; - - submitted_password = params['password'] - pwfields = Struct(json.loads(user.password)) - encrypted = Password( - submitted_password, - pwfields.salt, - pwfields.iterations, - pwfields.keylength, - pwfields.hashfunc) - logger.debug("\n\nComparing:\n{}\n{}\n".format(encrypted.password, pwfields.password)) - valid = pbkdf2.safe_str_cmp(encrypted.password, pwfields.password) - logger.debug("valid is:", valid) - - if valid and not user.confirmed: - VerificationEmail(user) - return render_template("new_security/verification_still_needed.html", - subject=VerificationEmail.subject) - if valid: - if params.get('remember'): - logger.debug("I will remember you") - self.remember_me = True - - if 'import_collections' in params: - import_col = "true" - else: - import_col = "false" - - #g.cookie_session.import_traits_to_user() - - self.logged_in = True - - return self.actual_login(user, import_collections=import_col) - - else: - if user: - self.unsuccessful_login(user) - flash("Invalid email-address or password. Please try again.", "alert-danger") - response = make_response(redirect(url_for('login'))) - - return response - - def actual_login(self, user, assumed_by=None, import_collections=None): - """The meat of the logging in process""" - session_id_signed = self.successful_login(user, assumed_by) - flash("Thank you for logging in {}.".format(user.full_name), "alert-success") - response = make_response(redirect(url_for('index_page', import_collections=import_collections))) - if self.remember_me: - max_age = self.remember_time - else: - max_age = None - - response.set_cookie(UserSession.cookie_name, session_id_signed, max_age=max_age) - return response - - def successful_login(self, user, assumed_by=None): - login_rec = model.Login(user) - login_rec.successful = True - login_rec.session_id = str(uuid.uuid4()) - login_rec.assumed_by = assumed_by - #session_id = "session_id:{}".format(login_rec.session_id) - session_id_signature = actual_hmac_creation(login_rec.session_id) - session_id_signed = login_rec.session_id + ":" + session_id_signature - logger.debug("session_id_signed:", session_id_signed) - - if not user.id: - user.id = '' - - session = dict(login_time = time.time(), - user_id = user.id, - user_name = user.full_name, - user_email_address = user.email_address) - - key = UserSession.cookie_name + ":" + login_rec.session_id - logger.debug("Key when signing:", key) - Redis.hmset(key, session) - if self.remember_me: - expire_time = self.remember_time - else: - expire_time = THREE_DAYS - Redis.expire(key, expire_time) - - return session_id_signed - - def unsuccessful_login(self, user): - login_rec = model.Login(user) - login_rec.successful = False - db_session.add(login_rec) - db_session.commit() - -#@app.route("/n/logout") -def logout(): - logger.debug("Logging out...") - UserSession().delete_session() - flash("You are now logged out. We hope you come back soon!") - response = make_response(redirect(url_for('index_page'))) - # Delete the cookie - response.set_cookie(UserSession.cookie_name, '', expires=0) - return response - - -#@app.route("/n/forgot_password", methods=['GET']) -def forgot_password(): - """Entry point for forgotten password""" - print("ARGS: ", request.args) - errors = {"no-email": request.args.get("no-email")} - print("ERRORS: ", errors) - return render_template("new_security/forgot_password.html", errors=errors) - -#@app.route("/n/forgot_password_submit", methods=('POST',)) -def forgot_password_submit(): - """When a forgotten password form is submitted we get here""" - params = request.form - email_address = params['email_address'] - next_page = None - if email_address != "": - logger.debug("Wants to send password E-mail to ", email_address) - user_details = get_user_by_unique_column("email_address", email_address) - if user_details: - ForgotPasswordEmail(user_details["email_address"]) - return render_template("new_security/forgot_password_step2.html", - subject=ForgotPasswordEmail.subject) - else: - flash("The e-mail entered is not associated with an account.", "alert-danger") - return redirect(url_for("forgot_password")) - - else: - flash("You MUST provide an email", "alert-danger") - return redirect(url_for("forgot_password")) - -@app.errorhandler(401) -def unauthorized(error): - return redirect(url_for('login')) - -def is_redis_available(): - try: - Redis.ping() - except: - return False - return True - -### -# ZS: The following 6 functions require the old MySQL User accounts; I'm leaving them commented out just in case we decide to reimplement them using ElasticSearch -### -#def super_only(): -# try: -# superuser = g.user_session.user_ob.superuser -# except AttributeError: -# superuser = False -# if not superuser: -# flash("You must be a superuser to access that page.", "alert-error") -# abort(401) - -#@app.route("/manage/users") -#def manage_users(): -# super_only() -# template_vars = UsersManager() -# return render_template("admin/user_manager.html", **template_vars.__dict__) - -#@app.route("/manage/user") -#def manage_user(): -# super_only() -# template_vars = UserManager(request.args) -# return render_template("admin/ind_user_manager.html", **template_vars.__dict__) - -#@app.route("/manage/groups") -#def manage_groups(): -# super_only() -# template_vars = GroupsManager(request.args) -# return render_template("admin/group_manager.html", **template_vars.__dict__) - -#@app.route("/manage/make_superuser") -#def make_superuser(): -# super_only() -# params = request.args -# user_id = params['user_id'] -# user = model.User.query.get(user_id) -# superuser_info = basic_info() -# superuser_info['crowned_by'] = g.user_session.user_id -# user.superuser = json.dumps(superuser_info, sort_keys=True) -# db_session.commit() -# flash("We've made {} a superuser!".format(user.name_and_org)) -# return redirect(url_for("manage_users")) - -#@app.route("/manage/assume_identity") -#def assume_identity(): -# super_only() -# params = request.args -# user_id = params['user_id'] -# user = model.User.query.get(user_id) -# assumed_by = g.user_session.user_id -# return LoginUser().actual_login(user, assumed_by=assumed_by) - - -#@app.route("/n/register", methods=('GET', 'POST')) -def register(): - params = None - errors = None - - - params = request.form if request.form else request.args - params = params.to_dict(flat=True) - - if params: - logger.debug("Attempting to register the user...") - result = RegisterUser(params) - errors = result.errors - - if len(errors) == 0: - flash("Registration successful. You may login with your new account", "alert-info") - return redirect(url_for("login")) - - return render_template("new_security/register_user.html", values=params, errors=errors) - - -################################# Sign and unsign ##################################### - -def url_for_hmac(endpoint, **values): - """Like url_for but adds an hmac at the end to insure the url hasn't been tampered with""" - - url = url_for(endpoint, **values) - - hm = actual_hmac_creation(url) - if '?' in url: - combiner = "&" - else: - combiner = "?" - return url + combiner + "hm=" + hm - -def data_hmac(stringy): - """Takes arbitray data string and appends :hmac so we know data hasn't been tampered with""" - return stringy + ":" + actual_hmac_creation(stringy) - - -def verify_url_hmac(url): - """Pass in a url that was created with url_hmac and this assures it hasn't been tampered with""" - logger.debug("url passed in to verify is:", url) - # Verify parts are correct at the end - we expect to see &hm= or ?hm= followed by an hmac - assert url[-23:-20] == "hm=", "Unexpected url (stage 1)" - assert url[-24] in ["?", "&"], "Unexpected url (stage 2)" - hmac = url[-20:] - url = url[:-24] # Url without any of the hmac stuff - - #logger.debug("before urlsplit, url is:", url) - #url = divide_up_url(url)[1] - #logger.debug("after urlsplit, url is:", url) - - hm = actual_hmac_creation(url) - - assert hm == hmac, "Unexpected url (stage 3)" - -def actual_hmac_creation(stringy): - """Helper function to create the actual hmac""" - - secret = app.config['SECRET_HMAC_CODE'] - - hmaced = hmac.new(secret, stringy, hashlib.sha1) - hm = hmaced.hexdigest() - # "Conventional wisdom is that you don't lose much in terms of security if you throw away up to half of the output." - # http://www.w3.org/QA/2009/07/hmac_truncation_in_xml_signatu.html - hm = hm[:20] - return hm - -app.jinja_env.globals.update(url_for_hmac=url_for_hmac, - data_hmac=data_hmac) - -####################################################################################### - -# def send_email(to, subject, body): -# msg = json.dumps(dict(From="no-reply@genenetwork.org", -# To=to, -# Subject=subject, -# Body=body)) -# Redis.rpush("mail_queue", msg) - -def send_email(toaddr, msg, fromaddr="no-reply@genenetwork.org"): - """Send an E-mail through SMTP_CONNECT host. If SMTP_USERNAME is not - 'UNKNOWN' TLS is used - - """ - if SMTP_USERNAME == 'UNKNOWN': - logger.debug("SMTP: connecting with host "+SMTP_CONNECT) - server = SMTP(SMTP_CONNECT) - server.sendmail(fromaddr, toaddr, msg) - else: - logger.debug("SMTP: connecting TLS with host "+SMTP_CONNECT) - server = SMTP(SMTP_CONNECT) - server.starttls() - logger.debug("SMTP: login with user "+SMTP_USERNAME) - server.login(SMTP_USERNAME, SMTP_PASSWORD) - logger.debug("SMTP: "+fromaddr) - logger.debug("SMTP: "+toaddr) - logger.debug("SMTP: "+msg) - server.sendmail(fromaddr, toaddr, msg) - server.quit() - logger.info("Successfully sent email to "+toaddr) - -class GroupsManager(object): - def __init__(self, kw): - self.datasets = create_datasets_list() - - -class RolesManager(object): - def __init__(self): - self.roles = model.Role.query.all() - logger.debug("Roles are:", self.roles) diff --git a/wqflask/wqflask/user_session.py b/wqflask/wqflask/user_session.py index c5a577df..67e2e158 100644 --- a/wqflask/wqflask/user_session.py +++ b/wqflask/wqflask/user_session.py @@ -20,30 +20,36 @@ logger = getLogger(__name__) THREE_DAYS = 60 * 60 * 24 * 3 THIRTY_DAYS = 60 * 60 * 24 * 30 + @app.before_request def get_user_session(): logger.info("@app.before_request get_session") g.user_session = UserSession() - #ZS: I think this should solve the issue of deleting the cookie and redirecting to the home page when a user's session has expired + # ZS: I think this should solve the issue of deleting the cookie and redirecting to the home page when a user's session has expired if not g.user_session: response = make_response(redirect(url_for('login'))) response.set_cookie('session_id_v2', '', expires=0) return response + @app.after_request def set_user_session(response): if hasattr(g, 'user_session'): if not request.cookies.get(g.user_session.cookie_name): - response.set_cookie(g.user_session.cookie_name, g.user_session.cookie) + response.set_cookie(g.user_session.cookie_name, + g.user_session.cookie) return response + def verify_cookie(cookie): the_uuid, separator, the_signature = cookie.partition(':') assert len(the_uuid) == 36, "Is session_id a uuid?" assert separator == ":", "Expected a : here" - assert the_signature == hmac.hmac_creation(the_uuid), "Uh-oh, someone tampering with the cookie?" + assert the_signature == hmac.hmac_creation( + the_uuid), "Uh-oh, someone tampering with the cookie?" return the_uuid + def create_signed_cookie(): the_uuid = str(uuid.uuid4()) signature = hmac.hmac_creation(the_uuid) @@ -51,19 +57,23 @@ def create_signed_cookie(): logger.debug("uuid_signed:", uuid_signed) return the_uuid, uuid_signed -@app.route("/user/manage", methods=('GET','POST')) + +@app.route("/user/manage", methods=('GET', 'POST')) def manage_user(): params = request.form if request.form else request.args if 'new_full_name' in params: - set_user_attribute(g.user_session.user_id, 'full_name', params['new_full_name']) + set_user_attribute(g.user_session.user_id, + 'full_name', params['new_full_name']) if 'new_organization' in params: - set_user_attribute(g.user_session.user_id, 'organization', params['new_organization']) + set_user_attribute(g.user_session.user_id, + 'organization', params['new_organization']) user_details = get_user_by_unique_column("user_id", g.user_session.user_id) - return render_template("admin/manage_user.html", user_details = user_details) + return render_template("admin/manage_user.html", user_details=user_details) + -class UserSession(object): +class UserSession: """Logged in user handling""" user_cookie_name = 'session_id_v2' @@ -89,25 +99,26 @@ class UserSession(object): self.session_id = session_id self.record = Redis.hgetall(self.redis_key) - #ZS: If user correctled logged in but their session expired - #ZS: Need to test this by setting the time-out to be really short or something + # ZS: If user correctled logged in but their session expired + # ZS: Need to test this by setting the time-out to be really short or something if not self.record or self.record == []: if user_cookie: self.logged_in = False - self.record = dict(login_time = time.time(), - user_type = "anon", - user_id = str(uuid.uuid4())) + self.record = dict(login_time=time.time(), + user_type="anon", + user_id=str(uuid.uuid4())) Redis.hmset(self.redis_key, self.record) Redis.expire(self.redis_key, THIRTY_DAYS) - ########### Grrr...this won't work because of the way flask handles cookies + # Grrr...this won't work because of the way flask handles cookies # Delete the cookie - flash("Due to inactivity your session has expired. If you'd like please login again.") + flash( + "Due to inactivity your session has expired. If you'd like please login again.") return None else: - self.record = dict(login_time = time.time(), - user_type = "anon", - user_id = str(uuid.uuid4())) + self.record = dict(login_time=time.time(), + user_type="anon", + user_id=str(uuid.uuid4())) Redis.hmset(self.redis_key, self.record) Redis.expire(self.redis_key, THIRTY_DAYS) else: @@ -138,13 +149,13 @@ class UserSession(object): def redis_user_id(self): """User id from Redis (need to check if this is the same as the id stored in self.records)""" - #ZS: This part is a bit weird. Some accounts used to not have saved user ids, and in the process of testing I think I created some duplicate accounts for myself. - #ZS: Accounts should automatically generate user_ids if they don't already have one now, so this might not be necessary for anything other than my account's collections + # ZS: This part is a bit weird. Some accounts used to not have saved user ids, and in the process of testing I think I created some duplicate accounts for myself. + # ZS: Accounts should automatically generate user_ids if they don't already have one now, so this might not be necessary for anything other than my account's collections if 'user_email_address' in self.record: user_email = self.record['user_email_address'] - #ZS: Get user's collections if they exist + # ZS: Get user's collections if they exist user_id = None user_id = get_user_id("email_address", user_email) elif 'user_id' in self.record: @@ -153,7 +164,7 @@ class UserSession(object): user_github_id = self.record['github_id'] user_id = None user_id = get_user_id("github_id", user_github_id) - else: #ZS: Anonymous user + else: # ZS: Anonymous user return None return user_id @@ -170,9 +181,11 @@ class UserSession(object): def user_collections(self): """List of user's collections""" - #ZS: Get user's collections if they exist + # ZS: Get user's collections if they exist collections = get_user_collections(self.user_id) - collections = [item for item in collections if item['name'] != "Your Default Collection"] + [item for item in collections if item['name'] == "Your Default Collection"] #ZS: Ensure Default Collection is last in list + collections = [item for item in collections if item['name'] != "Your Default Collection"] + \ + [item for item in collections if item['name'] + == "Your Default Collection"] # ZS: Ensure Default Collection is last in list return collections @property @@ -189,7 +202,7 @@ class UserSession(object): 'created_timestamp': datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p'), 'changed_timestamp': datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p'), 'num_members': len(traits), - 'members': list(traits) } + 'members': list(traits)} current_collections = self.user_collections current_collections.append(collection_dict) @@ -228,12 +241,14 @@ class UserSession(object): this_collection = self.get_collection_by_id(collection_id) updated_collection = this_collection - current_members_minus_new = [member for member in this_collection['members'] if member not in traits_to_add] + current_members_minus_new = [ + member for member in this_collection['members'] if member not in traits_to_add] updated_traits = traits_to_add + current_members_minus_new updated_collection['members'] = updated_traits updated_collection['num_members'] = len(updated_traits) - updated_collection['changed_timestamp'] = datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p') + updated_collection['changed_timestamp'] = datetime.datetime.utcnow().strftime( + '%b %d %Y %I:%M%p') updated_collections = [] for collection in self.user_collections: @@ -259,7 +274,8 @@ class UserSession(object): updated_collection['members'] = updated_traits updated_collection['num_members'] = len(updated_traits) - updated_collection['changed_timestamp'] = datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p') + updated_collection['changed_timestamp'] = datetime.datetime.utcnow().strftime( + '%b %d %Y %I:%M%p') updated_collections = [] for collection in self.user_collections: @@ -302,5 +318,3 @@ class UserSession(object): # And more importantly delete the redis record Redis.delete(self.redis_key) self.logged_in = False - - diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py index 2c0ba586..0714bd20 100644 --- a/wqflask/wqflask/views.py +++ b/wqflask/wqflask/views.py @@ -1,51 +1,72 @@ """Main routing table for GN2""" -import traceback # for error page -import os # for error gifs -import random # for random error gif -import datetime # for errors -import time # for errors -import sys +import MySQLdb +import array +import base64 import csv -import simplejson as json -import yaml -import xlsxwriter +import difflib +import datetime +import flask import io # Todo: Use cStringIO? -from zipfile import ZipFile, ZIP_DEFLATED - -import gc +import json import numpy as np +import os import pickle as pickle +import random +import sqlalchemy +import sys +import traceback import uuid +import xlsxwriter + +from itertools import groupby +from collections import namedtuple +from zipfile import ZipFile +from zipfile import ZIP_DEFLATED -import flask -import base64 -import array -import sqlalchemy from wqflask import app -from flask import g, Response, request, make_response, render_template, send_from_directory, jsonify, redirect, url_for, send_file -from wqflask import group_manager -from wqflask import resource_manager +from gn3.db import diff_from_dict +from gn3.db import fetchall +from gn3.db import fetchone +from gn3.db import insert +from gn3.db import update +from gn3.db.metadata_audit import MetadataAudit +from gn3.db.phenotypes import Phenotype +from gn3.db.phenotypes import Publication +from gn3.db.phenotypes import PublishXRef + + +from flask import current_app +from flask import g +from flask import Response +from flask import request +from flask import make_response +from flask import render_template +from flask import send_from_directory +from flask import redirect +from flask import url_for +from flask import send_file + +# Some of these (like collect) might contain endpoints, so they're still used. +# Blueprints should probably be used instead. +from wqflask import collect from wqflask import search_results -from wqflask import export_traits -from wqflask import gsearch -from wqflask import update_search_results -from wqflask import docs -from wqflask import news from wqflask import server_side -from wqflask.submit_bnw import get_bnw_input -from base.data_set import create_dataset, DataSet # Used by YAML in marker_regression +from base.data_set import create_dataset # Used by YAML in marker_regression from wqflask.show_trait import show_trait from wqflask.show_trait import export_trait_data from wqflask.heatmap import heatmap -from wqflask.external_tools import send_to_bnw, send_to_webgestalt, send_to_geneweaver +from wqflask.external_tools import send_to_bnw +from wqflask.external_tools import send_to_webgestalt +from wqflask.external_tools import send_to_geneweaver from wqflask.comparison_bar_chart import comparison_bar_chart from wqflask.marker_regression import run_mapping from wqflask.marker_regression import display_mapping_results from wqflask.network_graph import network_graph from wqflask.correlation import show_corr_results +from wqflask.correlation.correlation_gn3_api import compute_correlation from wqflask.correlation_matrix import show_corr_matrix from wqflask.correlation import corr_scatter_plot from wqflask.wgcna import wgcna_analysis @@ -56,27 +77,35 @@ from wqflask.export_traits import export_search_results_csv from wqflask.gsearch import GSearch from wqflask.update_search_results import GSearch as UpdateGSearch from wqflask.docs import Docs, update_text +from wqflask.decorators import admin_login_required from wqflask.db_info import InfoPage from utility import temp_data -from utility.tools import SQL_URI, TEMPDIR, USE_REDIS, USE_GN_SERVER, GN_SERVER_URL, GN_VERSION, JS_TWITTER_POST_FETCHER_PATH, JS_GUIX_PATH, CSS_PATH +from utility.tools import SQL_URI +from utility.tools import TEMPDIR +from utility.tools import USE_REDIS +from utility.tools import GN_SERVER_URL +from utility.tools import GN_VERSION +from utility.tools import JS_TWITTER_POST_FETCHER_PATH +from utility.tools import JS_GUIX_PATH from utility.helper_functions import get_species_groups from utility.authentication_tools import check_resource_availability from utility.redis_tools import get_redis_conn -Redis = get_redis_conn() + from base.webqtlConfig import GENERATED_IMAGE_DIR, DEFAULT_PRIVILEGES from utility.benchmark import Bench from pprint import pformat as pf -from wqflask import collect from wqflask.database import db_session -import werkzeug import utility.logger -logger = utility.logger.getLogger(__name__ ) + +Redis = get_redis_conn() + +logger = utility.logger.getLogger(__name__) @app.before_request @@ -84,13 +113,14 @@ def connect_db(): logger.info("@app.before_request connect_db") db = getattr(g, '_database', None) if db is None: - g.db = g._database = sqlalchemy.create_engine(SQL_URI, encoding="latin1") + g.db = g._database = sqlalchemy.create_engine( + SQL_URI, encoding="latin1") logger.debug(g.db) + @app.before_request def check_access_permissions(): logger.debug("@app.before_request check_access_permissions") - available = True if 'dataset' in request.args: permissions = DEFAULT_PRIVILEGES if request.args['dataset'] != "Temp": @@ -99,7 +129,8 @@ def check_access_permissions(): if dataset.type == "Temp": permissions = DEFAULT_PRIVILEGES elif 'trait_id' in request.args: - permissions = check_resource_availability(dataset, request.args['trait_id']) + permissions = check_resource_availability( + dataset, request.args['trait_id']) elif dataset.type != "Publish": permissions = check_resource_availability(dataset) @@ -110,6 +141,7 @@ def check_access_permissions(): if permissions['data'] == 'no-access': return redirect(url_for("no_access_page")) + @app.teardown_appcontext def shutdown_session(exception=None): db = getattr(g, '_database', None) @@ -118,6 +150,7 @@ def shutdown_session(exception=None): db_session.remove() g.db = None + @app.errorhandler(Exception) def handle_bad_request(e): err_msg = str(e) @@ -128,25 +161,30 @@ def handle_bad_request(e): logger.error(traceback.format_exc()) now = datetime.datetime.utcnow() time_str = now.strftime('%l:%M%p UTC %b %d, %Y') - formatted_lines = [request.url + " ("+time_str+")"]+traceback.format_exc().splitlines() + formatted_lines = [request.url + + " (" + time_str + ")"] + traceback.format_exc().splitlines() # Handle random animations # Use a cookie to have one animation on refresh animation = request.cookies.get(err_msg[:32]) if not animation: - list = [fn for fn in os.listdir("./wqflask/static/gif/error") if fn.endswith(".gif") ] + list = [fn for fn in os.listdir( + "./wqflask/static/gif/error") if fn.endswith(".gif")] animation = random.choice(list) - resp = make_response(render_template("error.html", message=err_msg, stack=formatted_lines, error_image=animation, version=GN_VERSION)) + resp = make_response(render_template("error.html", message=err_msg, + stack=formatted_lines, error_image=animation, version=GN_VERSION)) # logger.error("Set cookie %s with %s" % (err_msg, animation)) resp.set_cookie(err_msg[:32], animation) return resp + @app.route("/authentication_needed") def no_access_page(): return render_template("new_security/not_authenticated.html") + @app.route("/") def index_page(): logger.info("Sending index_page") @@ -156,13 +194,7 @@ def index_page(): import_collections = params['import_collections'] if import_collections == "true": g.user_session.import_traits_to_user(params['anon_id']) - #if USE_GN_SERVER: - # # The menu is generated using GN_SERVER - # return render_template("index_page.html", gn_server_url = GN_SERVER_URL, version=GN_VERSION) - #else: - - # Old style static menu (OBSOLETE) - return render_template("index_page_orig.html", version=GN_VERSION) + return render_template("index_page.html", version=GN_VERSION) @app.route("/tmp/<img_path>") @@ -177,7 +209,7 @@ def tmp_page(img_path): imgB64 = base64.b64encode(imgdata) bytesarray = array.array('B', imgB64) return render_template("show_image.html", - img_base64 = bytesarray ) + img_base64=bytesarray) @app.route("/js/<path:filename>") @@ -189,6 +221,7 @@ def js(filename): name = name.replace('js_alt/', '') return send_from_directory(js_path, name) + @app.route("/css/<path:filename>") def css(filename): js_path = JS_GUIX_PATH @@ -198,10 +231,12 @@ def css(filename): name = name.replace('js_alt/', '') return send_from_directory(js_path, name) + @app.route("/twitter/<path:filename>") def twitter(filename): return send_from_directory(JS_TWITTER_POST_FETCHER_PATH, filename) + @app.route("/search", methods=('GET',)) def search_page(): logger.info("in search_page") @@ -209,7 +244,8 @@ def search_page(): result = None if USE_REDIS: with Bench("Trying Redis cache"): - key = "search_results:v1:" + json.dumps(request.args, sort_keys=True) + key = "search_results:v1:" + \ + json.dumps(request.args, sort_keys=True) logger.debug("key is:", pf(key)) result = Redis.get(key) if result: @@ -225,13 +261,14 @@ def search_page(): if USE_REDIS and valid_search: Redis.set(key, pickle.dumps(result, pickle.HIGHEST_PROTOCOL)) - Redis.expire(key, 60*60) + Redis.expire(key, 60 * 60) if valid_search: return render_template("search_result_page.html", **result) else: return render_template("search_error.html") + @app.route("/search_table", methods=('GET',)) def search_page_table(): logger.info("in search_page table") @@ -242,7 +279,7 @@ def search_page_table(): logger.info(type(the_search.trait_list)) logger.info(the_search.trait_list) - + current_page = server_side.ServerSideTable( len(the_search.trait_list), the_search.trait_list, @@ -252,6 +289,7 @@ def search_page_table(): return flask.jsonify(current_page) + @app.route("/gsearch", methods=('GET',)) def gsearchact(): logger.info(request.url) @@ -262,17 +300,27 @@ def gsearchact(): elif type == "phenotype": return render_template("gsearch_pheno.html", **result) +@app.route("/gsearch_table", methods=('GET',)) +def gsearchtable(): + logger.info(request.url) + + gsearch_table_data = GSearch(request.args) + current_page = server_side.ServerSideTable( + gsearch_table_data.trait_count, + gsearch_table_data.trait_list, + gsearch_table_data.header_data_names, + request.args, + ).get_page() + + return flask.jsonify(current_page) + @app.route("/gsearch_updating", methods=('POST',)) def gsearch_updating(): logger.info("REQUEST ARGS:", request.values) logger.info(request.url) result = UpdateGSearch(request.args).__dict__ return result['results'] - # type = request.args['type'] - # if type == "gene": - # return render_template("gsearch_gene_updating.html", **result) - # elif type == "phenotype": - # return render_template("gsearch_pheno.html", **result) + @app.route("/docedit") def docedit(): @@ -292,41 +340,59 @@ def generated_file(filename): logger.info(request.url) return send_from_directory(GENERATED_IMAGE_DIR, filename) + @app.route("/help") def help(): logger.info(request.url) doc = Docs("help", request.args) return render_template("docs.html", **doc.__dict__) + @app.route("/wgcna_setup", methods=('POST',)) def wcgna_setup(): - logger.info("In wgcna, request.form is:", request.form) # We are going to get additional user input for the analysis + # We are going to get additional user input for the analysis + logger.info("In wgcna, request.form is:", request.form) logger.info(request.url) - return render_template("wgcna_setup.html", **request.form) # Display them using the template + # Display them using the template + return render_template("wgcna_setup.html", **request.form) + @app.route("/wgcna_results", methods=('POST',)) def wcgna_results(): logger.info("In wgcna, request.form is:", request.form) logger.info(request.url) - wgcna = wgcna_analysis.WGCNA() # Start R, load the package and pointers and create the analysis - wgcnaA = wgcna.run_analysis(request.form) # Start the analysis, a wgcnaA object should be a separate long running thread - result = wgcna.process_results(wgcnaA) # After the analysis is finished store the result - return render_template("wgcna_results.html", **result) # Display them using the template + # Start R, load the package and pointers and create the analysis + wgcna = wgcna_analysis.WGCNA() + # Start the analysis, a wgcnaA object should be a separate long running thread + wgcnaA = wgcna.run_analysis(request.form) + # After the analysis is finished store the result + result = wgcna.process_results(wgcnaA) + # Display them using the template + return render_template("wgcna_results.html", **result) + @app.route("/ctl_setup", methods=('POST',)) def ctl_setup(): - logger.info("In ctl, request.form is:", request.form) # We are going to get additional user input for the analysis + # We are going to get additional user input for the analysis + logger.info("In ctl, request.form is:", request.form) logger.info(request.url) - return render_template("ctl_setup.html", **request.form) # Display them using the template + # Display them using the template + return render_template("ctl_setup.html", **request.form) + @app.route("/ctl_results", methods=('POST',)) def ctl_results(): logger.info("In ctl, request.form is:", request.form) logger.info(request.url) - ctl = ctl_analysis.CTL() # Start R, load the package and pointers and create the analysis - ctlA = ctl.run_analysis(request.form) # Start the analysis, a ctlA object should be a separate long running thread - result = ctl.process_results(ctlA) # After the analysis is finished store the result - return render_template("ctl_results.html", **result) # Display them using the template + # Start R, load the package and pointers and create the analysis + ctl = ctl_analysis.CTL() + # Start the analysis, a ctlA object should be a separate long running thread + ctlA = ctl.run_analysis(request.form) + # After the analysis is finished store the result + result = ctl.process_results(ctlA) + # Display them using the template + return render_template("ctl_results.html", **result) + @app.route("/news") def news(): @@ -340,40 +406,159 @@ def intro(): return render_template("docs.html", **doc.__dict__) - @app.route("/tutorials") def tutorials(): - #doc = Docs("links", request.args) - #return render_template("docs.html", **doc.__dict__) return render_template("tutorials.html") + @app.route("/credits") def credits(): - #doc = Docs("links", request.args) - #return render_template("docs.html", **doc.__dict__) return render_template("credits.html") + @app.route("/update_text", methods=('POST',)) def update_page(): update_text(request.form) doc = Docs(request.form['entry_type'], request.form) return render_template("docs.html", **doc.__dict__) + @app.route("/submit_trait") def submit_trait_form(): logger.info(request.url) species_and_groups = get_species_groups() - return render_template("submit_trait.html", **{'species_and_groups' : species_and_groups, 'gn_server_url' : GN_SERVER_URL, 'version' : GN_VERSION}) + return render_template( + "submit_trait.html", + species_and_groups=species_and_groups, + gn_server_url=GN_SERVER_URL, + version=GN_VERSION) + + +@app.route("/trait/<name>/edit/<inbred_set_id>") +@admin_login_required +def edit_trait(name, inbred_set_id): + conn = MySQLdb.Connect(db=current_app.config.get("DB_NAME"), + user=current_app.config.get("DB_USER"), + passwd=current_app.config.get("DB_PASS"), + host=current_app.config.get("DB_HOST")) + publish_xref = fetchone( + conn=conn, + table="PublishXRef", + where=PublishXRef(id_=name, + inbred_set_id=inbred_set_id)) + phenotype_ = fetchone( + conn=conn, + table="Phenotype", + where=Phenotype(id_=publish_xref.phenotype_id)) + publication_ = fetchone( + conn=conn, + table="Publication", + where=Publication(id_=publish_xref.publication_id)) + json_data = fetchall( + conn, + "metadata_audit", + where=MetadataAudit(dataset_id=publish_xref.id_)) + + Edit = namedtuple("Edit", ["field", "old", "new", "diff"]) + Diff = namedtuple("Diff", ["author", "diff", "timestamp"]) + diff_data = [] + for data in json_data: + json_ = json.loads(data.json_data) + timestamp = json_.get("timestamp") + author = json_.get("author") + for key, value in json_.items(): + if isinstance(value, dict): + for field, data_ in value.items(): + diff_data.append( + Diff(author=author, + diff=Edit(field, + data_.get("old"), + data_.get("new"), + "\n".join(difflib.ndiff( + [data_.get("old")], + [data_.get("new")]))), + timestamp=timestamp)) + diff_data_ = None + if len(diff_data) > 0: + diff_data_ = groupby(diff_data, lambda x: x.timestamp) + return render_template( + "edit_trait.html", + diff=diff_data_, + publish_xref=publish_xref, + phenotype=phenotype_, + publication=publication_, + version=GN_VERSION, + ) + + +@app.route("/trait/update", methods=["POST"]) +def update_trait(): + conn = MySQLdb.Connect(db=current_app.config.get("DB_NAME"), + user=current_app.config.get("DB_USER"), + passwd=current_app.config.get("DB_PASS"), + host=current_app.config.get("DB_HOST")) + data_ = request.form.to_dict() + # Run updates: + phenotype_ = { + "pre_pub_description": data_.get("pre-pub-desc"), + "post_pub_description": data_.get("post-pub-desc"), + "original_description": data_.get("orig-desc"), + "units": data_.get("units"), + "pre_pub_abbreviation": data_.get("pre-pub-abbrev"), + "post_pub_abbreviation": data_.get("post-pub-abbrev"), + "lab_code": data_.get("labcode"), + "submitter": data_.get("submitter"), + "owner": data_.get("owner"), + "authorized_users": data_.get("authorized-users"), + } + updated_phenotypes = update( + conn, "Phenotype", + data=Phenotype(**phenotype_), + where=Phenotype(id_=data_.get("phenotype-id"))) + diff_data = {} + if updated_phenotypes: + diff_data.update({"Phenotype": diff_from_dict(old={ + k: data_.get(f"old_{k}") for k, v in phenotype_.items() + if v is not None}, new=phenotype_)}) + publication_ = { + "abstract": data_.get("abstract"), + "authors": data_.get("authors"), + "title": data_.get("title"), + "journal": data_.get("journal"), + "volume": data_.get("volume"), + "pages": data_.get("pages"), + "month": data_.get("month"), + "year": data_.get("year") + } + updated_publications = update( + conn, "Publication", + data=Publication(**publication_), + where=Publication(id_=data_.get("pubmed-id", + data_.get("old_id_")))) + if updated_publications: + diff_data.update({"Publication": diff_from_dict(old={ + k: data_.get(f"old_{k}") for k, v in publication_.items() + if v is not None}, new=publication_)}) + author = g.user_session.record.get(b'user_name') + if diff_data: + diff_data.update({"dataset_id": data_.get("dataset-name")}) + diff_data.update({"author": author.decode('utf-8')}) + diff_data.update({"timestamp": datetime.datetime.now().strftime( + "%Y-%m-%d %H:%M:%S")}) + insert(conn, + table="metadata_audit", + data=MetadataAudit(dataset_id=data_.get("dataset-name"), + editor=author.decode("utf-8"), + json_data=json.dumps(diff_data))) + return redirect("/trait/10007/edit/1") + @app.route("/create_temp_trait", methods=('POST',)) def create_temp_trait(): logger.info(request.url) - - #template_vars = submit_trait.SubmitTrait(request.form) - doc = Docs("links") return render_template("links.html", **doc.__dict__) - #return render_template("show_trait.html", **template_vars.__dict__) + @app.route('/export_trait_excel', methods=('POST',)) def export_trait_excel(): @@ -381,9 +566,11 @@ def export_trait_excel(): logger.info("In export_trait_excel") logger.info("request.form:", request.form) logger.info(request.url) - trait_name, sample_data = export_trait_data.export_sample_table(request.form) + trait_name, sample_data = export_trait_data.export_sample_table( + request.form) - logger.info("sample_data - type: %s -- size: %s" % (type(sample_data), len(sample_data))) + logger.info("sample_data - type: %s -- size: %s" % + (type(sample_data), len(sample_data))) buff = io.BytesIO() workbook = xlsxwriter.Workbook(buff, {'in_memory': True}) @@ -397,7 +584,8 @@ def export_trait_excel(): return Response(excel_data, mimetype='application/vnd.ms-excel', - headers={"Content-Disposition":"attachment;filename="+ trait_name + ".xlsx"}) + headers={"Content-Disposition": "attachment;filename=" + trait_name + ".xlsx"}) + @app.route('/export_trait_csv', methods=('POST',)) def export_trait_csv(): @@ -405,9 +593,11 @@ def export_trait_csv(): logger.info("In export_trait_csv") logger.info("request.form:", request.form) logger.info(request.url) - trait_name, sample_data = export_trait_data.export_sample_table(request.form) + trait_name, sample_data = export_trait_data.export_sample_table( + request.form) - logger.info("sample_data - type: %s -- size: %s" % (type(sample_data), len(sample_data))) + logger.info("sample_data - type: %s -- size: %s" % + (type(sample_data), len(sample_data))) buff = io.StringIO() writer = csv.writer(buff) @@ -418,7 +608,8 @@ def export_trait_csv(): return Response(csv_data, mimetype='text/csv', - headers={"Content-Disposition":"attachment;filename="+ trait_name + ".csv"}) + headers={"Content-Disposition": "attachment;filename=" + trait_name + ".csv"}) + @app.route('/export_traits_csv', methods=('POST',)) def export_traits_csv(): @@ -432,7 +623,7 @@ def export_traits_csv(): now = datetime.datetime.now() time_str = now.strftime('%H:%M_%d%B%Y') filename = "export_{}".format(time_str) - memory_file = io.StringIO() + memory_file = io.BytesIO() with ZipFile(memory_file, mode='w', compression=ZIP_DEFLATED) as zf: for the_file in file_list: zf.writestr(the_file[0], the_file[1]) @@ -443,7 +634,8 @@ def export_traits_csv(): else: return Response(file_list[0][1], mimetype='text/csv', - headers={"Content-Disposition":"attachment;filename=" + file_list[0][0]}) + headers={"Content-Disposition": "attachment;filename=" + file_list[0][0]}) + @app.route('/export_perm_data', methods=('POST',)) def export_perm_data(): @@ -454,7 +646,8 @@ def export_perm_data(): now = datetime.datetime.now() time_str = now.strftime('%H:%M_%d%B%Y') - file_name = "Permutation_" + perm_info['num_perm'] + "_" + perm_info['trait_name'] + "_" + time_str + file_name = "Permutation_" + \ + perm_info['num_perm'] + "_" + perm_info['trait_name'] + "_" + time_str the_rows = [ ["#Permutation Test"], @@ -468,10 +661,14 @@ def export_perm_data(): ["#N_genotypes: " + str(perm_info['n_genotypes'])], ["#Genotype_file: " + perm_info['genofile']], ["#Units_linkage: " + perm_info['units_linkage']], - ["#Permutation_stratified_by: " + ", ".join([ str(cofactor) for cofactor in perm_info['strat_cofactors']])], - ["#RESULTS_1: Suggestive LRS(p=0.63) = " + str(np.percentile(np.array(perm_info['perm_data']), 67))], - ["#RESULTS_2: Significant LRS(p=0.05) = " + str(np.percentile(np.array(perm_info['perm_data']), 95))], - ["#RESULTS_3: Highly Significant LRS(p=0.01) = " + str(np.percentile(np.array(perm_info['perm_data']), 99))], + ["#Permutation_stratified_by: " + + ", ".join([str(cofactor) for cofactor in perm_info['strat_cofactors']])], + ["#RESULTS_1: Suggestive LRS(p=0.63) = " + + str(np.percentile(np.array(perm_info['perm_data']), 67))], + ["#RESULTS_2: Significant LRS(p=0.05) = " + str( + np.percentile(np.array(perm_info['perm_data']), 95))], + ["#RESULTS_3: Highly Significant LRS(p=0.01) = " + str( + np.percentile(np.array(perm_info['perm_data']), 99))], ["#Comment: Results sorted from low to high peak linkage"] ] @@ -485,38 +682,29 @@ def export_perm_data(): return Response(csv_data, mimetype='text/csv', - headers={"Content-Disposition":"attachment;filename=" + file_name + ".csv"}) + headers={"Content-Disposition": "attachment;filename=" + file_name + ".csv"}) + @app.route("/show_temp_trait", methods=('POST',)) def show_temp_trait_page(): logger.info(request.url) template_vars = show_trait.ShowTrait(request.form) - #logger.info("js_data before dump:", template_vars.js_data) template_vars.js_data = json.dumps(template_vars.js_data, default=json_default_handler, indent=" ") - # Sorting the keys messes up the ordered dictionary, so don't do that - #sort_keys=True) - - #logger.info("js_data after dump:", template_vars.js_data) - #logger.info("show_trait template_vars:", pf(template_vars.__dict__)) return render_template("show_trait.html", **template_vars.__dict__) + @app.route("/show_trait") def show_trait_page(): logger.info(request.url) template_vars = show_trait.ShowTrait(request.args) - #logger.info("js_data before dump:", template_vars.js_data) template_vars.js_data = json.dumps(template_vars.js_data, default=json_default_handler, indent=" ") - # Sorting the keys messes up the ordered dictionary, so don't do that - #sort_keys=True) - - #logger.info("js_data after dump:", template_vars.js_data) - #logger.info("show_trait template_vars:", pf(template_vars.__dict__)) return render_template("show_trait.html", **template_vars.__dict__) + @app.route("/heatmap", methods=('POST',)) def heatmap_page(): logger.info("In heatmap, request.form is:", pf(request.form)) @@ -528,7 +716,8 @@ def heatmap_page(): traits = [trait.strip() for trait in start_vars['trait_list'].split(',')] if traits[0] != "": version = "v5" - key = "heatmap:{}:".format(version) + json.dumps(start_vars, sort_keys=True) + key = "heatmap:{}:".format( + version) + json.dumps(start_vars, sort_keys=True) logger.info("key is:", pf(key)) with Bench("Loading cache"): result = Redis.get(key) @@ -549,21 +738,24 @@ def heatmap_page(): result = template_vars.__dict__ for item in list(template_vars.__dict__.keys()): - logger.info(" ---**--- {}: {}".format(type(template_vars.__dict__[item]), item)) + logger.info( + " ---**--- {}: {}".format(type(template_vars.__dict__[item]), item)) pickled_result = pickle.dumps(result, pickle.HIGHEST_PROTOCOL) logger.info("pickled result length:", len(pickled_result)) Redis.set(key, pickled_result) - Redis.expire(key, 60*60) + Redis.expire(key, 60 * 60) with Bench("Rendering template"): rendered_template = render_template("heatmap.html", **result) else: - rendered_template = render_template("empty_collection.html", **{'tool':'Heatmap'}) + rendered_template = render_template( + "empty_collection.html", **{'tool': 'Heatmap'}) return rendered_template + @app.route("/bnw_page", methods=('POST',)) def bnw_page(): logger.info("In run BNW, request.form is:", pf(request.form)) @@ -578,10 +770,12 @@ def bnw_page(): result = template_vars.__dict__ rendered_template = render_template("bnw_page.html", **result) else: - rendered_template = render_template("empty_collection.html", **{'tool':'BNW'}) + rendered_template = render_template( + "empty_collection.html", **{'tool': 'BNW'}) return rendered_template + @app.route("/webgestalt_page", methods=('POST',)) def webgestalt_page(): logger.info("In run WebGestalt, request.form is:", pf(request.form)) @@ -596,10 +790,12 @@ def webgestalt_page(): result = template_vars.__dict__ rendered_template = render_template("webgestalt_page.html", **result) else: - rendered_template = render_template("empty_collection.html", **{'tool':'WebGestalt'}) + rendered_template = render_template( + "empty_collection.html", **{'tool': 'WebGestalt'}) return rendered_template + @app.route("/geneweaver_page", methods=('POST',)) def geneweaver_page(): logger.info("In run WebGestalt, request.form is:", pf(request.form)) @@ -614,10 +810,12 @@ def geneweaver_page(): result = template_vars.__dict__ rendered_template = render_template("geneweaver_page.html", **result) else: - rendered_template = render_template("empty_collection.html", **{'tool':'GeneWeaver'}) + rendered_template = render_template( + "empty_collection.html", **{'tool': 'GeneWeaver'}) return rendered_template + @app.route("/comparison_bar_chart", methods=('POST',)) def comp_bar_chart_page(): logger.info("In comp bar chart, request.form is:", pf(request.form)) @@ -629,26 +827,30 @@ def comp_bar_chart_page(): if traits[0] != "": template_vars = comparison_bar_chart.ComparisonBarChart(request.form) template_vars.js_data = json.dumps(template_vars.js_data, - default=json_default_handler, - indent=" ") + default=json_default_handler, + indent=" ") result = template_vars.__dict__ - rendered_template = render_template("comparison_bar_chart.html", **result) + rendered_template = render_template( + "comparison_bar_chart.html", **result) else: - rendered_template = render_template("empty_collection.html", **{'tool':'Comparison Bar Chart'}) + rendered_template = render_template( + "empty_collection.html", **{'tool': 'Comparison Bar Chart'}) return rendered_template + @app.route("/mapping_results_container") def mapping_results_container_page(): return render_template("mapping_results_container.html") + @app.route("/loading", methods=('POST',)) def loading_page(): - logger.info(request.url) + # logger.info(request.url) initial_start_vars = request.form start_vars_container = {} - n_samples = 0 #ZS: So it can be displayed on loading page + n_samples = 0 # ZS: So it can be displayed on loading page if 'wanted_inputs' in initial_start_vars: wanted = initial_start_vars['wanted_inputs'].split(",") start_vars = {} @@ -661,16 +863,17 @@ def loading_page(): else: sample_vals_dict = json.loads(start_vars['sample_vals']) if 'group' in start_vars: - dataset = create_dataset(start_vars['dataset'], group_name = start_vars['group']) + dataset = create_dataset( + start_vars['dataset'], group_name=start_vars['group']) else: dataset = create_dataset(start_vars['dataset']) - genofile_samplelist = [] samples = start_vars['primary_samples'].split(",") if 'genofile' in start_vars: if start_vars['genofile'] != "": genofile_string = start_vars['genofile'] dataset.group.genofile = genofile_string.split(":")[0] - genofile_samples = run_mapping.get_genofile_samplelist(dataset) + genofile_samples = run_mapping.get_genofile_samplelist( + dataset) if len(genofile_samples) > 1: samples = genofile_samples @@ -690,6 +893,7 @@ def loading_page(): return rendered_template + @app.route("/run_mapping", methods=('POST',)) def mapping_results_page(): initial_start_vars = request.form @@ -733,7 +937,6 @@ def mapping_results_page(): 'color_scheme', 'manhattan_single_color', 'control_marker', - 'control_marker_db', 'do_control', 'genofile', 'genofile_string', @@ -760,9 +963,10 @@ def mapping_results_page(): start_vars[key] = value version = "v3" - key = "mapping_results:{}:".format(version) + json.dumps(start_vars, sort_keys=True) + key = "mapping_results:{}:".format( + version) + json.dumps(start_vars, sort_keys=True) with Bench("Loading cache"): - result = None # Just for testing + result = None # Just for testing #result = Redis.get(key) #logger.info("************************ Starting result *****************") @@ -782,12 +986,12 @@ def mapping_results_page(): rendered_template = render_template("mapping_error.html") return rendered_template except: - rendered_template = render_template("mapping_error.html") - return rendered_template + rendered_template = render_template("mapping_error.html") + return rendered_template template_vars.js_data = json.dumps(template_vars.js_data, - default=json_default_handler, - indent=" ") + default=json_default_handler, + indent=" ") result = template_vars.__dict__ @@ -802,18 +1006,20 @@ def mapping_results_page(): imgB64 = base64.b64encode(imgdata) bytesarray = array.array('B', imgB64) result['pair_scan_array'] = bytesarray - rendered_template = render_template("pair_scan_results.html", **result) + rendered_template = render_template( + "pair_scan_results.html", **result) else: - gn1_template_vars = display_mapping_results.DisplayMappingResults(result).__dict__ + gn1_template_vars = display_mapping_results.DisplayMappingResults( + result).__dict__ with Bench("Rendering template"): - #if (gn1_template_vars['mapping_method'] == "gemma") or (gn1_template_vars['mapping_method'] == "plink"): - #gn1_template_vars.pop('qtlresults', None) - rendered_template = render_template("mapping_results.html", **gn1_template_vars) + rendered_template = render_template( + "mapping_results.html", **gn1_template_vars) return rendered_template -@app.route("/export_mapping_results", methods = ('POST',)) + +@app.route("/export_mapping_results", methods=('POST',)) def export_mapping_results(): logger.info("request.form:", request.form) logger.info(request.url) @@ -821,32 +1027,35 @@ def export_mapping_results(): results_csv = open(file_path, "r").read() response = Response(results_csv, mimetype='text/csv', - headers={"Content-Disposition":"attachment;filename=mapping_results.csv"}) + headers={"Content-Disposition": "attachment;filename=mapping_results.csv"}) return response -@app.route("/export_corr_matrix", methods = ('POST',)) + +@app.route("/export_corr_matrix", methods=('POST',)) def export_corr_matrix(): file_path = request.form.get("export_filepath") file_name = request.form.get("export_filename") results_csv = open(file_path, "r").read() response = Response(results_csv, mimetype='text/csv', - headers={"Content-Disposition":"attachment;filename=" + file_name + ".csv"}) + headers={"Content-Disposition": "attachment;filename=" + file_name + ".csv"}) return response -@app.route("/export", methods = ('POST',)) + +@app.route("/export", methods=('POST',)) def export(): logger.info("request.form:", request.form) logger.info(request.url) svg_xml = request.form.get("data", "Invalid data") filename = request.form.get("filename", "manhattan_plot_snp") response = Response(svg_xml, mimetype="image/svg+xml") - response.headers["Content-Disposition"] = "attachment; filename=%s"%filename + response.headers["Content-Disposition"] = "attachment; filename=%s" % filename return response -@app.route("/export_pdf", methods = ('POST',)) + +@app.route("/export_pdf", methods=('POST',)) def export_pdf(): import cairosvg logger.info("request.form:", request.form) @@ -854,12 +1063,12 @@ def export_pdf(): svg_xml = request.form.get("data", "Invalid data") logger.info("svg_xml:", svg_xml) filename = request.form.get("filename", "interval_map_pdf") - filepath = GENERATED_IMAGE_DIR+filename pdf_file = cairosvg.svg2pdf(bytestring=svg_xml) response = Response(pdf_file, mimetype="application/pdf") - response.headers["Content-Disposition"] = "attachment; filename=%s"%filename + response.headers["Content-Disposition"] = "attachment; filename=%s" % filename return response + @app.route("/network_graph", methods=('POST',)) def network_graph_page(): logger.info("In network_graph, request.form is:", pf(request.form)) @@ -874,7 +1083,8 @@ def network_graph_page(): return render_template("network_graph.html", **template_vars.__dict__) else: - return render_template("empty_collection.html", **{'tool':'Network Graph'}) + return render_template("empty_collection.html", **{'tool': 'Network Graph'}) + @app.route("/corr_compute", methods=('POST',)) def corr_compute_page(): @@ -883,6 +1093,17 @@ def corr_compute_page(): template_vars = show_corr_results.CorrelationResults(request.form) return render_template("correlation_page.html", **template_vars.__dict__) + # to test/disable the new correlation api uncomment these lines + + # correlation_results = compute_correlation(request.form) + # return render_template("test_correlation_page.html", correlation_results=correlation_results) + + +@app.route("/test_corr_compute", methods=["POST"]) +def test_corr_compute_page(): + correlation_data = compute_correlation(request.form) + return render_template("test_correlation_page.html", **correlation_data) + @app.route("/corr_matrix", methods=('POST',)) def corr_matrix_page(): logger.info("In corr_matrix, request.form is:", pf(request.form)) @@ -898,7 +1119,8 @@ def corr_matrix_page(): return render_template("correlation_matrix.html", **template_vars.__dict__) else: - return render_template("empty_collection.html", **{'tool':'Correlation Matrix'}) + return render_template("empty_collection.html", **{'tool': 'Correlation Matrix'}) + @app.route("/corr_scatter_plot") def corr_scatter_plot_page(): @@ -909,6 +1131,7 @@ def corr_scatter_plot_page(): indent=" ") return render_template("corr_scatterplot.html", **template_vars.__dict__) + @app.route("/snp_browser", methods=('GET',)) def snp_browser_page(): logger.info(request.url) @@ -916,12 +1139,14 @@ def snp_browser_page(): return render_template("snp_browser.html", **template_vars.__dict__) + @app.route("/db_info", methods=('GET',)) def db_info_page(): template_vars = InfoPage(request.args) return render_template("info_page.html", **template_vars.__dict__) + @app.route("/snp_browser_table", methods=('GET',)) def snp_browser_table(): logger.info(request.url) @@ -935,31 +1160,36 @@ def snp_browser_table(): return flask.jsonify(current_page) + @app.route("/tutorial/WebQTLTour", methods=('GET',)) def tutorial_page(): - #ZS: Currently just links to GN1 + # ZS: Currently just links to GN1 logger.info(request.url) return redirect("http://gn1.genenetwork.org/tutorial/WebQTLTour/") + @app.route("/tutorial/security", methods=('GET',)) def security_tutorial_page(): - #ZS: Currently just links to GN1 + # ZS: Currently just links to GN1 logger.info(request.url) return render_template("admin/security_help.html") + @app.route("/submit_bnw", methods=('POST',)) def submit_bnw(): logger.info(request.url) - template_vars = get_bnw_input(request.form) - return render_template("empty_collection.html", **{'tool':'Correlation Matrix'}) + return render_template("empty_collection.html", **{'tool': 'Correlation Matrix'}) # Take this out or secure it before putting into production + + @app.route("/get_temp_data") def get_temp_data(): logger.info(request.url) temp_uuid = request.args['key'] return flask.jsonify(temp_data.TempData(temp_uuid).get_all()) + @app.route("/browser_input", methods=('GET',)) def browser_inputs(): """ Returns JSON from tmp directory for the purescript genome browser""" @@ -973,8 +1203,9 @@ def browser_inputs(): ########################################################################## + def json_default_handler(obj): - '''Based on http://stackoverflow.com/a/2680060/1175849''' + """Based on http://stackoverflow.com/a/2680060/1175849""" # Handle datestamps if hasattr(obj, 'isoformat'): return obj.isoformat() @@ -984,9 +1215,6 @@ def json_default_handler(obj): # Handle custom objects if hasattr(obj, '__dict__'): return obj.__dict__ - #elif type(obj) == "Dataset": - # logger.info("Not going to serialize Dataset") - # return None else: raise TypeError('Object of type %s with value of %s is not JSON serializable' % ( type(obj), repr(obj))) diff --git a/wqflask/wqflask/wgcna/wgcna_analysis.py b/wqflask/wqflask/wgcna/wgcna_analysis.py index 6bf75216..f96892a0 100644 --- a/wqflask/wqflask/wgcna/wgcna_analysis.py +++ b/wqflask/wqflask/wgcna/wgcna_analysis.py @@ -42,7 +42,7 @@ r_png = ro.r["png"] # Map the png function for plotting r_dev_off = ro.r["dev.off"] # Map the dev.off function -class WGCNA(object): +class WGCNA: def __init__(self): # To log output from stdout/stderr to a file add `r_sink(log)` print("Initialization of WGCNA") @@ -70,7 +70,7 @@ class WGCNA(object): self.trait_db_list = [trait.strip() for trait in requestform['trait_list'].split(',')] print(("Retrieved phenotype data from database", - requestform['trait_list'])) + requestform['trait_list'])) helper_functions.get_trait_db_obs(self, self.trait_db_list) # self.input contains the phenotype values we need to send to R diff --git a/wqflask/wsgi.py b/wqflask/wsgi.py index be9c7b37..755da333 100644 --- a/wqflask/wsgi.py +++ b/wqflask/wsgi.py @@ -1,4 +1,4 @@ -from run_gunicorn import app as application # expect application as a name +from run_gunicorn import app as application # expect application as a name if __name__ == "__main__": application.run() |