From 4e6722beb4302bf22719ad783443767bb1ba7f6a Mon Sep 17 00:00:00 2001
From: Zachary Sloan
Date: Wed, 24 Apr 2013 19:35:55 +0000
Subject: Fixed issue with the part of quick_search_table.py that loads
phenotype traits
---
wqflask/maintenance/quick_search_table.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/wqflask/maintenance/quick_search_table.py b/wqflask/maintenance/quick_search_table.py
index 046a05c4..9ea8d2a3 100644
--- a/wqflask/maintenance/quick_search_table.py
+++ b/wqflask/maintenance/quick_search_table.py
@@ -136,7 +136,7 @@ class PublishXRef(Base):
"Phenotype.Original_description as description, "
"PublishXRef.LRS as lrs, "
"PublishXRef.PublicationId as publication_id, "
- "Publication.PubMed_ID"
+ "Publication.PubMed_ID, "
"Publication.Year as year, "
"Publication.Authors as authors "
"FROM PublishXRef, "
--
cgit v1.2.3
From 23dbf30e4c96faae3fda19f539be2a8b260dc957 Mon Sep 17 00:00:00 2001
From: Zachary Sloan
Date: Wed, 24 Apr 2013 19:56:39 +0000
Subject: Checking in quick_search_table.py before using pylin
---
wqflask/maintenance/quick_search_table.py | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/wqflask/maintenance/quick_search_table.py b/wqflask/maintenance/quick_search_table.py
index 9ea8d2a3..aa40badf 100644
--- a/wqflask/maintenance/quick_search_table.py
+++ b/wqflask/maintenance/quick_search_table.py
@@ -136,7 +136,7 @@ class PublishXRef(Base):
"Phenotype.Original_description as description, "
"PublishXRef.LRS as lrs, "
"PublishXRef.PublicationId as publication_id, "
- "Publication.PubMed_ID, "
+ "Publication.PubMed_ID as pubmed_id, "
"Publication.Year as year, "
"Publication.Authors as authors "
"FROM PublishXRef, "
@@ -450,11 +450,12 @@ QuickSearch = sa.Table("QuickSearch", Metadata,
sa.Column('the_key', sa.String(30),
primary_key=True, nullable=False, autoincrement=False), # key in database table
sa.Column('terms', sa.Text), # terms to compare search string with
- sa.Column('result_fields', sa.Text) # json
+ sa.Column('result_fields', sa.Text), # json
+ mysql_engine = 'MyISAM',
)
-QuickSearch.drop(Engine, checkfirst=True)
-Metadata.create_all(Engine)
+#QuickSearch.drop(Engine, checkfirst=True)
+#Metadata.create_all(Engine)
def row2dict(row):
@@ -476,8 +477,8 @@ def page_query(q):
def main():
- ProbeSetXRef.run()
- GenoXRef.run()
+ #ProbeSetXRef.run()
+ #GenoXRef.run()
PublishXRef.run()
if __name__ == "__main__":
--
cgit v1.2.3
From 7223bae7ee981618fd3c2d8cb602b17da2661151 Mon Sep 17 00:00:00 2001
From: Zachary Sloan
Date: Wed, 24 Apr 2013 20:01:00 +0000
Subject: Added original pylintrc
---
wqflask/other_config/pylintrc | 264 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 264 insertions(+)
create mode 100644 wqflask/other_config/pylintrc
diff --git a/wqflask/other_config/pylintrc b/wqflask/other_config/pylintrc
new file mode 100644
index 00000000..36d2bddf
--- /dev/null
+++ b/wqflask/other_config/pylintrc
@@ -0,0 +1,264 @@
+[MASTER]
+
+# Specify a configuration file.
+#rcfile=
+
+# Python code to execute, usually for sys.path manipulation such as
+# pygtk.require().
+#init-hook=
+
+# Profiled execution.
+profile=no
+
+# Add files or directories to the blacklist. They should be base names, not
+# paths.
+ignore=CVS
+
+# Pickle collected data for later comparisons.
+persistent=yes
+
+# List of plugins (as comma separated values of python modules names) to load,
+# usually to register additional checkers.
+load-plugins=
+
+
+[MESSAGES CONTROL]
+
+# Enable the message, report, category or checker with the given id(s). You can
+# either give multiple identifier separated by comma (,) or put this option
+# multiple time. See also the "--disable" option for examples.
+#enable=
+
+# Disable the message, report, category or checker with the given id(s). You
+# can either give multiple identifiers separated by comma (,) or put this
+# option multiple times (only on the command line, not in the configuration
+# file where it should appear only once).You can also use "--disable=all" to
+# disable everything first and then reenable specific checks. For example, if
+# you want to run only the similarities checker, you can use "--disable=all
+# --enable=similarities". If you want to run only the classes checker, but have
+# no Warning level messages displayed, use"--disable=all --enable=classes
+# --disable=W"
+#disable=
+
+
+[REPORTS]
+
+# Set the output format. Available formats are text, parseable, colorized, msvs
+# (visual studio) and html. You can also give a reporter class, eg
+# mypackage.mymodule.MyReporterClass.
+output-format=text
+
+# Include message's id in output
+include-ids=no
+
+# Include symbolic ids of messages in output
+symbols=no
+
+# Put messages in a separate file for each module / package specified on the
+# command line instead of printing them on stdout. Reports (if any) will be
+# written in a file name "pylint_global.[txt|html]".
+files-output=no
+
+# Tells whether to display a full report or only the messages
+reports=yes
+
+# Python expression which should return a note less than 10 (10 is the highest
+# note). You have access to the variables errors warning, statement which
+# respectively contain the number of errors / warnings messages and the total
+# number of statements analyzed. This is used by the global evaluation report
+# (RP0004).
+evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
+
+# Add a comment according to your evaluation note. This is used by the global
+# evaluation report (RP0004).
+comment=no
+
+
+[SIMILARITIES]
+
+# Minimum lines number of a similarity.
+min-similarity-lines=4
+
+# Ignore comments when computing similarities.
+ignore-comments=yes
+
+# Ignore docstrings when computing similarities.
+ignore-docstrings=yes
+
+# Ignore imports when computing similarities.
+ignore-imports=no
+
+
+[VARIABLES]
+
+# Tells whether we should check for unused import in __init__ files.
+init-import=no
+
+# A regular expression matching the beginning of the name of dummy variables
+# (i.e. not used).
+dummy-variables-rgx=_|dummy
+
+# List of additional names supposed to be defined in builtins. Remember that
+# you should avoid to define new builtins when possible.
+additional-builtins=
+
+
+[BASIC]
+
+# Required attributes for module, separated by a comma
+required-attributes=
+
+# List of builtins function names that should not be used, separated by a comma
+bad-functions=map,filter,apply,input
+
+# Regular expression which should only match correct module names
+module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
+
+# Regular expression which should only match correct module level names
+const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$
+
+# Regular expression which should only match correct class names
+class-rgx=[A-Z_][a-zA-Z0-9]+$
+
+# Regular expression which should only match correct function names
+function-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match correct method names
+method-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match correct instance attribute names
+attr-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match correct argument names
+argument-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match correct variable names
+variable-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match correct list comprehension /
+# generator expression variable names
+inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$
+
+# Good variable names which should always be accepted, separated by a comma
+good-names=i,j,k,ex,Run,_
+
+# Bad variable names which should always be refused, separated by a comma
+bad-names=foo,bar,baz,toto,tutu,tata
+
+# Regular expression which should only match functions or classes name which do
+# not require a docstring
+no-docstring-rgx=__.*__
+
+
+[FORMAT]
+
+# Maximum number of characters on a single line.
+max-line-length=80
+
+# Maximum number of lines in a module
+max-module-lines=1000
+
+# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
+# tab).
+indent-string=' '
+
+
+[MISCELLANEOUS]
+
+# List of note tags to take in consideration, separated by a comma.
+notes=FIXME,XXX,TODO
+
+
+[TYPECHECK]
+
+# Tells whether missing members accessed in mixin class should be ignored. A
+# mixin class is detected if its name ends with "mixin" (case insensitive).
+ignore-mixin-members=yes
+
+# List of classes names for which member attributes should not be checked
+# (useful for classes with attributes dynamically set).
+ignored-classes=SQLObject
+
+# When zope mode is activated, add a predefined set of Zope acquired attributes
+# to generated-members.
+zope=no
+
+# List of members which are set dynamically and missed by pylint inference
+# system, and so shouldn't trigger E0201 when accessed. Python regular
+# expressions are accepted.
+generated-members=REQUEST,acl_users,aq_parent
+
+
+[DESIGN]
+
+# Maximum number of arguments for function / method
+max-args=5
+
+# Argument names that match this expression will be ignored. Default to name
+# with leading underscore
+ignored-argument-names=_.*
+
+# Maximum number of locals for function / method body
+max-locals=15
+
+# Maximum number of return / yield for function / method body
+max-returns=6
+
+# Maximum number of branch for function / method body
+max-branchs=12
+
+# Maximum number of statements in function / method body
+max-statements=50
+
+# Maximum number of parents for a class (see R0901).
+max-parents=7
+
+# Maximum number of attributes for a class (see R0902).
+max-attributes=7
+
+# Minimum number of public methods for a class (see R0903).
+min-public-methods=2
+
+# Maximum number of public methods for a class (see R0904).
+max-public-methods=20
+
+
+[IMPORTS]
+
+# Deprecated modules which should not be used, separated by a comma
+deprecated-modules=regsub,string,TERMIOS,Bastion,rexec
+
+# Create a graph of every (i.e. internal and external) dependencies in the
+# given file (report RP0402 must not be disabled)
+import-graph=
+
+# Create a graph of external dependencies in the given file (report RP0402 must
+# not be disabled)
+ext-import-graph=
+
+# Create a graph of internal dependencies in the given file (report RP0402 must
+# not be disabled)
+int-import-graph=
+
+
+[CLASSES]
+
+# List of interface methods to ignore, separated by a comma. This is used for
+# instance to not check methods defines in Zope's Interface base class.
+ignore-iface-methods=isImplementedBy,deferred,extends,names,namesAndDescriptions,queryDescriptionFor,getBases,getDescriptionFor,getDoc,getName,getTaggedValue,getTaggedValueTags,isEqualOrExtendedBy,setTaggedValue,isImplementedByInstancesOf,adaptWith,is_implemented_by
+
+# List of method names used to declare (i.e. assign) instance attributes.
+defining-attr-methods=__init__,__new__,setUp
+
+# List of valid names for the first argument in a class method.
+valid-classmethod-first-arg=cls
+
+# List of valid names for the first argument in a metaclass class method.
+valid-metaclass-classmethod-first-arg=mcs
+
+
+[EXCEPTIONS]
+
+# Exceptions that will emit a warning when being caught. Defaults to
+# "Exception"
+overgeneral-exceptions=Exception
--
cgit v1.2.3
From 00f20d2d650c44794a8d21aad45d748e9aa0365b Mon Sep 17 00:00:00 2001
From: Zachary Sloan
Date: Wed, 24 Apr 2013 23:30:18 +0000
Subject: Began writing code that writes some objects used in LMM to redis and
writes/reads eachitem in the plink_input
---
misc/notes.txt | 8 ++
wqflask/maintenance/quick_search_table.py | 124 ++++++++++++++----------
wqflask/other_config/pylintrc | 6 +-
wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 38 ++++++--
wqflask/wqflask/my_pylmm/pyLMM/process_plink.py | 24 +++++
5 files changed, 136 insertions(+), 64 deletions(-)
create mode 100644 wqflask/wqflask/my_pylmm/pyLMM/process_plink.py
diff --git a/misc/notes.txt b/misc/notes.txt
index 6bdcccf3..5d4bcaeb 100644
--- a/misc/notes.txt
+++ b/misc/notes.txt
@@ -106,6 +106,14 @@ sudo /etc/init.d/redis_6379 start
===========================================
+Run pylint:
+pylint file_name
+
+For options:
+pylint --help
+
+===========================================
+
Start screen session:
If "no option -R":
diff --git a/wqflask/maintenance/quick_search_table.py b/wqflask/maintenance/quick_search_table.py
index aa40badf..4f2cd8a9 100644
--- a/wqflask/maintenance/quick_search_table.py
+++ b/wqflask/maintenance/quick_search_table.py
@@ -1,17 +1,17 @@
-from __future__ import print_function, division, absolute_import
+"""Creates a table used for the quick search feature.
+
+One column contains the terms to match the user's search against. Another contains the result
+fields in json format
-"""
Results will be returned for each of several trait types: mRNA assays, phenotypes, genotypes, and
(maybe later) genes
-For each trait type, the results for each species should be given; for example, have a "Mouse" tab
-with the mouse traits in a table inside it
-
-This table will then list each trait, its dataset, and several columns determined by its trait type
-(phenotype, genotype, etc)
+For each trait type, the results for each species should be given This table will then list
+each trait, its dataset, and several columns determined by its trait type (phenotype, genotype, etc)
"""
+from __future__ import print_function, division, absolute_import
import sys
sys.path.append("../../..")
@@ -19,19 +19,17 @@ sys.path.append("../../..")
import simplejson as json
import sqlalchemy as sa
-from sqlalchemy.dialects import mysql
-from sqlalchemy.orm import scoped_session, sessionmaker, relationship, backref
-from sqlalchemy.orm.exc import NoResultFound
+from sqlalchemy.orm import scoped_session, sessionmaker
from sqlalchemy.ext.declarative import declarative_base
-from pprint import pformat as pf
+#from pprint import pformat as pf
import zach_settings as settings
Engine = sa.create_engine(settings.SQLALCHEMY_DATABASE_URI,
#encoding='utf-8',
#client_encoding='utf-8',
- #echo="debug",
+ #echo="debug",w
)
Session = scoped_session(sessionmaker(bind=Engine)) #, extension=VersionedListener()))
@@ -41,6 +39,14 @@ Metadata = sa.MetaData()
Metadata.bind = Engine
class PublishXRef(Base):
+ """Class that corresponds with the PublishXRef table in the database.
+
+ The PublishXRef table links phenotype traits and their publications.
+
+ This class is used to add phenotype traits to the quick search table.
+
+ """
+
__tablename__ = 'PublishXRef'
Id = sa.Column(sa.Integer, primary_key=True)
@@ -56,15 +62,16 @@ class PublishXRef(Base):
@classmethod
def run(cls):
+ """Connects to database and inserts phenotype trait info into the Quicksearch table."""
conn = Engine.connect()
counter = 0
- for ps in page_query(Session.query(cls)): #all()
+ for pub_row in page_query(Session.query(cls)): #all()
values = {}
values['table_name'] = cls.__tablename__
- values['the_key'] = json.dumps([ps.Id, ps.InbredSetId])
- values['terms'] = cls.get_unique_terms(ps.Id, ps.InbredSetId)
+ values['the_key'] = json.dumps([pub_row.Id, pub_row.InbredSetId])
+ values['terms'] = cls.get_unique_terms(pub_row.Id, pub_row.InbredSetId)
print("terms is:", values['terms'])
- values['result_fields'] = cls.get_result_fields(ps.Id, ps.InbredSetId)
+ values['result_fields'] = cls.get_result_fields(pub_row.Id, pub_row.InbredSetId)
ins = QuickSearch.insert().values(**values)
conn.execute(ins)
counter += 1
@@ -72,6 +79,7 @@ class PublishXRef(Base):
@staticmethod
def get_unique_terms(publishxref_id, inbredset_id):
+ """Finds unique terms for each item in the PublishXRef table to match a query against"""
results = Session.query(
"pre_publication_description",
"post_publication_description",
@@ -119,6 +127,7 @@ class PublishXRef(Base):
@staticmethod
def get_result_fields(publishxref_id, inbredset_id):
+ """Gets the result fields (columns) that appear on the result page as a json string"""
results = Session.query(
"phenotype_id",
"species",
@@ -150,17 +159,10 @@ class PublishXRef(Base):
"PublishXRef.PublicationId = Publication.Id and "
"InbredSet.Id = :inbredset_id and "
"Species.Id = InbredSet.SpeciesId ").params(publishxref_id=publishxref_id,
- inbredset_id=inbredset_id).all()
- #"InbredSet.SpeciesId = Species.Id and "
- #"Geno.SpeciesId = Species.Id and "
- #"Geno.Name = PublishXRef.Locus ").params(publishxref_id=publishxref_id,
- # inbredset_id=inbredset_id).all()
- #for result in results:
- # print("****", result)
+ inbredset_id=inbredset_id).all()
assert len(set(result for result in results)) == 1, "Different results or no results"
- #print("results are:", results)
result = results[0]
result = row2dict(result)
try:
@@ -176,7 +178,16 @@ class PublishXRef(Base):
return json_results
+
class GenoXRef(Base):
+ """Class that corresponds with the GenoXRef table in the database.
+
+ The GenoXRef table links genotype traits and their data.
+
+ This class is used to add genotype traits to the quick search table.
+
+ """
+
__tablename__ = 'GenoXRef'
GenoFreezeId = sa.Column(sa.Integer, primary_key=True)
@@ -187,6 +198,7 @@ class GenoXRef(Base):
@classmethod
def run(cls):
+ """Connects to database and inserts genotype trait info into the Quicksearch table."""
conn = Engine.connect()
counter = 0
for item in page_query(Session.query(cls)): #all()
@@ -201,9 +213,10 @@ class GenoXRef(Base):
conn.execute(ins)
counter += 1
print("Done:", counter)
-
+
@staticmethod
def get_unique_terms(geno_id):
+ """Finds unique terms for each item in the GenoXRef table to match a query against"""
print("geno_id: ", geno_id)
results = Session.query(
"name",
@@ -214,8 +227,6 @@ class GenoXRef(Base):
"FROM Geno "
"WHERE Geno.Id = :geno_id ").params(geno_id=geno_id).all()
- #print("results: ", pf(results))
-
unique = set()
if len(results):
for item in results[0]:
@@ -234,12 +245,12 @@ class GenoXRef(Base):
continue
unique.add(token)
- #print("\nUnique terms are: {}\n".format(unique))
return " ".join(unique)
@staticmethod
def get_result_fields(geno_id, dataset_id):
+ """Gets the result fields (columns) that appear on the result page as a json string"""
results = Session.query(
"name",
"marker_name",
@@ -271,11 +282,8 @@ class GenoXRef(Base):
"InbredSet.Id = GenoFreeze.InbredSetId and "
"InbredSet.SpeciesId = Species.Id ").params(geno_id=geno_id,
dataset_id=dataset_id).all()
- #for result in results:
- # print(result)
assert len(set(result for result in results)) == 1, "Different results"
- #print("results are:", results)
result = results[0]
result = row2dict(result)
try:
@@ -292,6 +300,14 @@ class GenoXRef(Base):
return json_results
class ProbeSetXRef(Base):
+ """Class that corresponds with the ProbeSetXRef table in the database.
+
+ The ProbeSetXRef table links mRNA expression traits and their sample data.
+
+ This class is used to add mRNA expression traits to the quick search table.
+
+ """
+
__tablename__ = 'ProbeSetXRef'
ProbeSetFreezeId = sa.Column(sa.Integer, primary_key=True)
@@ -310,16 +326,17 @@ class ProbeSetXRef(Base):
@classmethod
def run(cls):
+ """Connects to db and inserts mRNA expression trait info into the Quicksearch table."""
conn = Engine.connect()
counter = 0
- for ps in page_query(Session.query(cls)): #all()
+ for ps_row in page_query(Session.query(cls)): #all()
values = {}
values['table_name'] = cls.__tablename__
- values['the_key'] = json.dumps([ps.ProbeSetId, ps.ProbeSetFreezeId])
- values['terms'] = cls.get_unique_terms(ps.ProbeSetId)
+ values['the_key'] = json.dumps([ps_row.ProbeSetId, ps_row.ProbeSetFreezeId])
+ values['terms'] = cls.get_unique_terms(ps_row.ProbeSetId)
print("terms is:", values['terms'])
- #values['species'] = get_species("ProbeSet", ps.Id)
- values['result_fields'] = cls.get_result_fields(ps.ProbeSetId, ps.ProbeSetFreezeId)
+ values['result_fields'] = cls.get_result_fields(ps_row.ProbeSetId,
+ ps_row.ProbeSetFreezeId)
if values['result_fields'] == None:
continue
ins = QuickSearch.insert().values(**values)
@@ -329,6 +346,7 @@ class ProbeSetXRef(Base):
@staticmethod
def get_unique_terms(probeset_id):
+ """Finds unique terms for each item in the ProbeSetXRef table to match a query against"""
results = Session.query(
"name",
"symbol",
@@ -345,7 +363,6 @@ class ProbeSetXRef(Base):
unique = set()
if len(results):
for item in results[0]:
- #print("locals:", locals())
if not item:
continue
for token in item.split():
@@ -366,12 +383,12 @@ class ProbeSetXRef(Base):
continue
unique.add(token)
- #print("\nUnique terms are: {}\n".format(unique))
return " ".join(unique)
@staticmethod
def get_result_fields(probeset_id, dataset_id):
+ """Gets the result fields (columns) that appear on the result page as a json string"""
print("probeset_id: ", probeset_id)
print("dataset_id: ", dataset_id)
results = Session.query(
@@ -420,14 +437,10 @@ class ProbeSetXRef(Base):
"ProbeFreeze.InbredSetId = InbredSet.Id and "
"InbredSet.SpeciesId = Species.Id ").params(probeset_id=probeset_id,
dataset_id=dataset_id).all()
- #for result in results:
- # print("-", result)
if len(set(result for result in results)) != 1:
return None
- #assert len(set(result for result in results)) == 1, "Different results"
-
- #print("results are:", results)
+
result = results[0]
result = row2dict(result)
try:
@@ -443,10 +456,10 @@ class ProbeSetXRef(Base):
return json_results
-
QuickSearch = sa.Table("QuickSearch", Metadata,
+ # table_name is the table that item is inserted from
sa.Column('table_name', sa.String(15),
- primary_key=True, nullable=False, autoincrement=False), # table that item is inserted from
+ primary_key=True, nullable=False, autoincrement=False),
sa.Column('the_key', sa.String(30),
primary_key=True, nullable=False, autoincrement=False), # key in database table
sa.Column('terms', sa.Text), # terms to compare search string with
@@ -459,24 +472,29 @@ QuickSearch = sa.Table("QuickSearch", Metadata,
def row2dict(row):
- """http://stackoverflow.com/a/2848519/1175849"""
+ """From http://stackoverflow.com/a/2848519/1175849"""
return dict(zip(row.keys(), row))
-def page_query(q):
- """http://stackoverflow.com/a/1217947/1175849"""
+def page_query(query):
+ """From http://stackoverflow.com/a/1217947/1175849"""
offset = 0
while True:
- r = False
- for elem in q.limit(1000).offset(offset):
- r = True
- yield elem
+ rrr = False
+ for elem in query.limit(1000).offset(offset):
+ rrr = True
+ yield elem
offset += 1000
- if not r:
+ if not rrr:
break
def main():
+ """Populate the QuickSearch table that is used with the quick search features.
+
+ Add all items from the ProbeSetXRef, GenoXRef, and PublishXRef tables to the QuickSearch tables.
+
+ """
#ProbeSetXRef.run()
#GenoXRef.run()
PublishXRef.run()
diff --git a/wqflask/other_config/pylintrc b/wqflask/other_config/pylintrc
index 36d2bddf..b23af7a4 100644
--- a/wqflask/other_config/pylintrc
+++ b/wqflask/other_config/pylintrc
@@ -38,7 +38,7 @@ load-plugins=
# --enable=similarities". If you want to run only the classes checker, but have
# no Warning level messages displayed, use"--disable=all --enable=classes
# --disable=W"
-#disable=
+disable=no-init, star-args, no-member, import-error
[REPORTS]
@@ -115,7 +115,7 @@ bad-functions=map,filter,apply,input
module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
# Regular expression which should only match correct module level names
-const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$
+const-rgx=(([A-Z_][A-Za-z0-9_]*)|(__.*__))$
# Regular expression which should only match correct class names
class-rgx=[A-Z_][a-zA-Z0-9]+$
@@ -153,7 +153,7 @@ no-docstring-rgx=__.*__
[FORMAT]
# Maximum number of characters on a single line.
-max-line-length=80
+max-line-length=100
# Maximum number of lines in a module
max-module-lines=1000
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index a3ba8fdb..10221a2e 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -28,6 +28,7 @@ from scipy import stats
import pdb
import gzip
+import datetime
import cPickle as pickle
import simplejson as json
@@ -38,6 +39,9 @@ from utility import temp_data
from wqflask.my_pylmm.pyLMM import chunks
+import redis
+Redis = redis.Redis()
+
#np.seterr('raise')
def run_human(pheno_vector,
@@ -65,6 +69,7 @@ def run_human(pheno_vector,
covariate_matrix)
lmm_ob.fit()
+
# Buffers for pvalues and t-stats
p_values = []
t_stats = []
@@ -86,19 +91,36 @@ def run_human(pheno_vector,
with Bench("Create list of inputs"):
inputs = list(plink_input)
-
+
with Bench("Divide into chunks"):
results = chunks.divide_into_chunks(inputs, 64)
result_store = []
- identifier = uuid.uuid4()
- for part, result in enumerate(results):
- # todo: Don't use TempData here. Instead revert old one and store this stuff as a list
- data_store = temp_data.TempData(identifier, "plink", part)
-
- data_store.store("data", pickle.dumps(result, pickle.HIGHEST_PROTOCOL))
- result_store.append(data_store)
+ identifier = str(uuid.uuid4())
+
+ lmm_vars = pickle.dumps(dict(
+ pheno_vector = pheno_vector,
+ covariate_matrix = covariate_matrix,
+ kinship_matrix = kinship_matrix
+ ))
+ Redis.hset(identifier, "lmm_vars", pickle.dumps(lmm_vars))
+
+ key = "plink_inputs"
+ timestamp = datetime.datetime.utcnow().isoformat()
+
+ for part, result in enumerate(results):
+ #data = pickle.dumps(result, pickle.HIGHEST_PROTOCOL)
+ holder = pickle.dumps(dict(
+ identifier = identifier,
+ part = part,
+ timestamp = timestamp,
+ result = result
+ ), pickle.HIGHEST_PROTOCOL)
+ print("Adding:", part)
+ Redis.rpush(key, holder)
+
+ print("***** Added to {} queue *****".format(key))
for snp, this_id in plink_input:
with Bench("part before association"):
if count > 2000:
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/process_plink.py b/wqflask/wqflask/my_pylmm/pyLMM/process_plink.py
new file mode 100644
index 00000000..5ce7adbc
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/pyLMM/process_plink.py
@@ -0,0 +1,24 @@
+from __future__ import absolute_import, print_function, division
+
+import cPickle as pickle
+import redis
+Redis = redis.Redis()
+
+from wqflask.my_pylmm.pyLMM import lmm
+
+lmm_vars_pickled = Redis.get("lmm_vars")
+
+
+plink_pickled = Redis.lpop("plink_inputs")
+
+plink_data = pickle.loads(plink_pickled)
+
+
+ps, ts = lmm.human_association(snp,
+ n,
+ keep,
+ lmm_ob,
+ pheno_vector,
+ covariate_matrix,
+ kinship_matrix,
+ refit)
\ No newline at end of file
--
cgit v1.2.3
From 275f9210a7af6a2de32e8e102de816276b5ef237 Mon Sep 17 00:00:00 2001
From: Zachary Sloan
Date: Fri, 26 Apr 2013 16:21:38 +0000
Subject: Added file that checks how much time is added to a loop's runtime by
print statements
Continued work on file that processes each individual chunk of the snp iterator/plink input
---
wqflask/maintenance/print_benchmark.py | 43 +++++++++++++++++++++++++
wqflask/wqflask/my_pylmm/pyLMM/process_plink.py | 6 +++-
2 files changed, 48 insertions(+), 1 deletion(-)
create mode 100644 wqflask/maintenance/print_benchmark.py
diff --git a/wqflask/maintenance/print_benchmark.py b/wqflask/maintenance/print_benchmark.py
new file mode 100644
index 00000000..540e0904
--- /dev/null
+++ b/wqflask/maintenance/print_benchmark.py
@@ -0,0 +1,43 @@
+#!/usr/bin/python
+
+from __future__ import absolute_import, print_function, division
+
+import time
+
+from pprint import pformat as pf
+
+
+class TheCounter(object):
+ Counters = {}
+
+ def __init__(self):
+ start_time = time.time()
+ for counter in range(170000):
+ self.print_it(counter)
+ self.time_took = time.time() - start_time
+ TheCounter.Counters[self.__class__.__name__] = self.time_took
+
+class PrintAll(TheCounter):
+ def print_it(self, counter):
+ print(counter)
+
+class PrintSome(TheCounter):
+ def print_it(self, counter):
+ if counter % 1000 == 0:
+ print(counter)
+
+class PrintNone(TheCounter):
+ def print_it(self, counter):
+ pass
+
+
+def new_main():
+ print("Running new_main")
+ tests = [PrintAll, PrintSome, PrintNone]
+ for test in tests:
+ test()
+
+ print(pf(TheCounter.Counters))
+
+if __name__ == '__main__':
+ new_main()
\ No newline at end of file
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/process_plink.py b/wqflask/wqflask/my_pylmm/pyLMM/process_plink.py
index 5ce7adbc..8f7ad243 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/process_plink.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/process_plink.py
@@ -13,6 +13,9 @@ plink_pickled = Redis.lpop("plink_inputs")
plink_data = pickle.loads(plink_pickled)
+identifier = plink_data['identifier']
+print("identifier: ", identifier)
+
ps, ts = lmm.human_association(snp,
n,
@@ -21,4 +24,5 @@ ps, ts = lmm.human_association(snp,
pheno_vector,
covariate_matrix,
kinship_matrix,
- refit)
\ No newline at end of file
+ refit)
+
--
cgit v1.2.3
From d4138b76eae3aa54ba18d44d753cb440aac7a0f8 Mon Sep 17 00:00:00 2001
From: Zachary Sloan
Date: Tue, 30 Apr 2013 19:16:51 +0000
Subject: Did a little with correlation_plot.py
---
wqflask/wqflask/correlation/correlation_plot.py | 7 ++++++-
wqflask/wqflask/my_pylmm/pyLMM/process_plink.py | 2 +-
2 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/wqflask/wqflask/correlation/correlation_plot.py b/wqflask/wqflask/correlation/correlation_plot.py
index 4b043fc3..c2b64d70 100644
--- a/wqflask/wqflask/correlation/correlation_plot.py
+++ b/wqflask/wqflask/correlation/correlation_plot.py
@@ -24,6 +24,10 @@ class CorrelationPlot(object):
self.samples_1 = self.get_samples(self.dataset1, sample_names_1, self.trait1)
self.samples_2 = self.get_samples(self.dataset2, sample_names_2, self.trait2)
+ coords = {}
+ for sample in self.samples_1:
+ coords[sample.name] = (sample.val)
+
def get_sample_names(self, dataset):
if dataset.group.parlist:
@@ -45,4 +49,5 @@ class CorrelationPlot(object):
sample_group_type='primary',
header="%s Only" % (dataset.group.name))
- return samples
\ No newline at end of file
+ return samples
+
\ No newline at end of file
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/process_plink.py b/wqflask/wqflask/my_pylmm/pyLMM/process_plink.py
index 8f7ad243..1274fe50 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/process_plink.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/process_plink.py
@@ -17,6 +17,7 @@ identifier = plink_data['identifier']
print("identifier: ", identifier)
+
ps, ts = lmm.human_association(snp,
n,
keep,
@@ -25,4 +26,3 @@ ps, ts = lmm.human_association(snp,
covariate_matrix,
kinship_matrix,
refit)
-
--
cgit v1.2.3
From dd3f7bb79d39252a987826a9825d00da782ba58a Mon Sep 17 00:00:00 2001
From: Zachary Sloan
Date: Thu, 2 May 2013 22:58:18 +0000
Subject: Got quick search code running (but not displaying properly)
Code that processes subset of snps works, in process of
putting into a class
---
wqflask/maintenance/quick_search_table.py | 4 +-
wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 79 +++++++-------
wqflask/wqflask/my_pylmm/pyLMM/process_plink.py | 131 +++++++++++++++++++++---
wqflask/wqflask/search_results.py | 14 +--
wqflask/wqflask/templates/quick_search.html | 6 +-
5 files changed, 173 insertions(+), 61 deletions(-)
diff --git a/wqflask/maintenance/quick_search_table.py b/wqflask/maintenance/quick_search_table.py
index 4f2cd8a9..b07e7656 100644
--- a/wqflask/maintenance/quick_search_table.py
+++ b/wqflask/maintenance/quick_search_table.py
@@ -495,9 +495,9 @@ def main():
Add all items from the ProbeSetXRef, GenoXRef, and PublishXRef tables to the QuickSearch tables.
"""
- #ProbeSetXRef.run()
+ ProbeSetXRef.run()
#GenoXRef.run()
- PublishXRef.run()
+ #PublishXRef.run()
if __name__ == "__main__":
main()
\ No newline at end of file
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 10221a2e..fc021a0b 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -28,6 +28,7 @@ from scipy import stats
import pdb
import gzip
+import zlib
import datetime
import cPickle as pickle
import simplejson as json
@@ -55,15 +56,26 @@ def run_human(pheno_vector,
keep = True - v
keep = keep.reshape((len(keep),))
+ identifier = str(uuid.uuid4())
+
+ lmm_vars = pickle.dumps(dict(
+ pheno_vector = pheno_vector,
+ covariate_matrix = covariate_matrix,
+ kinship_matrix = kinship_matrix
+ ))
+ Redis.hset(identifier, "lmm_vars", lmm_vars)
+ Redis.expire(identifier, 60*60)
+
if v.sum():
pheno_vector = pheno_vector[keep]
#print("pheno_vector shape is now: ", pf(pheno_vector.shape))
covariate_matrix = covariate_matrix[keep,:]
- #print("kinship_matrix shape is: ", pf(kinship_matrix.shape))
+ print("kinship_matrix shape is: ", pf(kinship_matrix.shape))
#print("len(keep) is: ", pf(keep.shape))
kinship_matrix = kinship_matrix[keep,:][:,keep]
n = kinship_matrix.shape[0]
+ print("n is:", n)
lmm_ob = LMM(pheno_vector,
kinship_matrix,
covariate_matrix)
@@ -96,19 +108,15 @@ def run_human(pheno_vector,
results = chunks.divide_into_chunks(inputs, 64)
result_store = []
- identifier = str(uuid.uuid4())
-
- lmm_vars = pickle.dumps(dict(
- pheno_vector = pheno_vector,
- covariate_matrix = covariate_matrix,
- kinship_matrix = kinship_matrix
- ))
- Redis.hset(identifier, "lmm_vars", pickle.dumps(lmm_vars))
-
key = "plink_inputs"
+
+ # Todo: Delete below line when done testing
+ Redis.delete(key)
+
timestamp = datetime.datetime.utcnow().isoformat()
+ print("Starting adding loop")
for part, result in enumerate(results):
#data = pickle.dumps(result, pickle.HIGHEST_PROTOCOL)
holder = pickle.dumps(dict(
@@ -117,33 +125,34 @@ def run_human(pheno_vector,
timestamp = timestamp,
result = result
), pickle.HIGHEST_PROTOCOL)
+
print("Adding:", part)
- Redis.rpush(key, holder)
-
+ Redis.rpush(key, zlib.compress(holder))
+ print("End adding loop")
print("***** Added to {} queue *****".format(key))
for snp, this_id in plink_input:
- with Bench("part before association"):
- if count > 2000:
- break
- count += 1
-
- percent_complete = (float(count) / total_snps) * 100
- #print("percent_complete: ", percent_complete)
- loading_progress.store("percent_complete", percent_complete)
-
- with Bench("actual association"):
- ps, ts = human_association(snp,
- n,
- keep,
- lmm_ob,
- pheno_vector,
- covariate_matrix,
- kinship_matrix,
- refit)
-
- with Bench("after association"):
- p_values.append(ps)
- t_stats.append(ts)
+ #with Bench("part before association"):
+ if count > 2000:
+ break
+ count += 1
+
+ percent_complete = (float(count) / total_snps) * 100
+ #print("percent_complete: ", percent_complete)
+ loading_progress.store("percent_complete", percent_complete)
+
+ #with Bench("actual association"):
+ ps, ts = human_association(snp,
+ n,
+ keep,
+ lmm_ob,
+ pheno_vector,
+ covariate_matrix,
+ kinship_matrix,
+ refit)
+
+ #with Bench("after association"):
+ p_values.append(ps)
+ t_stats.append(ts)
return p_values, t_stats
@@ -326,7 +335,7 @@ def GWAS(pheno_vector,
covariate_matrix - n x q covariate matrix
restricted_max_likelihood - use restricted maximum likelihood
refit - refit the variance component for each SNP
-
+
"""
if kinship_eigen_vals == None:
kinship_eigen_vals = []
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/process_plink.py b/wqflask/wqflask/my_pylmm/pyLMM/process_plink.py
index 1274fe50..e47c18e1 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/process_plink.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/process_plink.py
@@ -1,28 +1,127 @@
from __future__ import absolute_import, print_function, division
+import sys
+sys.path.append("../../..")
+
+print("sys.path: ", sys.path)
+
+import numpy as np
+
+import zlib
import cPickle as pickle
import redis
Redis = redis.Redis()
-from wqflask.my_pylmm.pyLMM import lmm
-
-lmm_vars_pickled = Redis.get("lmm_vars")
-
+import lmm
-plink_pickled = Redis.lpop("plink_inputs")
+class ProcessLmmChunk(object):
+
+ def __init__(self):
+ self.get_snp_data()
+ self.get_lmm_vars()
+
+ keep = self.trim_matrices()
+
+ self.do_association(keep)
+
+ print("p_values is: ", self.p_values)
+
+ def get_snp_data(self):
+ plink_pickled = zlib.decompress(Redis.lpop("plink_inputs"))
+ plink_data = pickle.loads(plink_pickled)
+
+ self.snps = np.array(plink_data['result'])
+ self.identifier = plink_data['identifier']
+
+ def get_lmm_vars(self):
+ lmm_vars_pickled = Redis.hget(self.identifier, "lmm_vars")
+ lmm_vars = pickle.loads(lmm_vars_pickled)
+
+ self.pheno_vector = np.array(lmm_vars['pheno_vector'])
+ self.covariate_matrix = np.array(lmm_vars['covariate_matrix'])
+ self.kinship_matrix = np.array(lmm_vars['kinship_matrix'])
+
+ def trim_matrices(self):
+ v = np.isnan(self.pheno_vector)
+ keep = True - v
+ keep = keep.reshape((len(keep),))
+
+ if v.sum():
+ self.pheno_vector = self.pheno_vector[keep]
+ self.covariate_matrix = self.covariate_matrix[keep,:]
+ self.kinship_matrix = self.kinship_matrix[keep,:][:,keep]
-plink_data = pickle.loads(plink_pickled)
+ return keep
+
+ def do_association(self, keep):
+ n = self.kinship_matrix.shape[0]
+ lmm_ob = lmm.LMM(self.pheno_vector,
+ self.kinship_matrix,
+ self.covariate_matrix)
+ lmm_ob.fit()
+
+ self.p_values = []
+
+ for snp in self.snps:
+ snp = snp[0]
+ p_value, t_stat = lmm.human_association(snp,
+ n,
+ keep,
+ lmm_ob,
+ self.pheno_vector,
+ self.covariate_matrix,
+ self.kinship_matrix,
+ False)
+
+ self.p_values.append(p_value)
+
-identifier = plink_data['identifier']
-print("identifier: ", identifier)
+#plink_pickled = zlib.decompress(Redis.lpop("plink_inputs"))
+#
+#plink_data = pickle.loads(plink_pickled)
+#result = np.array(plink_data['result'])
+#print("snp size is: ", result.shape)
+#identifier = plink_data['identifier']
+#
+#lmm_vars_pickled = Redis.hget(identifier, "lmm_vars")
+#lmm_vars = pickle.loads(lmm_vars_pickled)
+#
+#pheno_vector = np.array(lmm_vars['pheno_vector'])
+#covariate_matrix = np.array(lmm_vars['covariate_matrix'])
+#kinship_matrix = np.array(lmm_vars['kinship_matrix'])
+#
+#v = np.isnan(pheno_vector)
+#keep = True - v
+#keep = keep.reshape((len(keep),))
+#print("keep is: ", keep)
+#
+#if v.sum():
+# pheno_vector = pheno_vector[keep]
+# covariate_matrix = covariate_matrix[keep,:]
+# kinship_matrix = kinship_matrix[keep,:][:,keep]
+#
+#n = kinship_matrix.shape[0]
+#print("n is: ", n)
+#lmm_ob = lmm.LMM(pheno_vector,
+# kinship_matrix,
+# covariate_matrix)
+#lmm_ob.fit()
+#
+#p_values = []
+#
+#for snp in result:
+# snp = snp[0]
+# p_value, t_stat = lmm.human_association(snp,
+# n,
+# keep,
+# lmm_ob,
+# pheno_vector,
+# covariate_matrix,
+# kinship_matrix,
+# False)
+#
+# p_values.append(p_value)
+
-ps, ts = lmm.human_association(snp,
- n,
- keep,
- lmm_ob,
- pheno_vector,
- covariate_matrix,
- kinship_matrix,
- refit)
diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py
index dc872a8b..89f146b3 100644
--- a/wqflask/wqflask/search_results.py
+++ b/wqflask/wqflask/search_results.py
@@ -26,8 +26,7 @@ from MySQLdb import escape_string as escape
from htmlgen import HTMLgen2 as HT
from base import webqtlConfig
-from utility.THCell import THCell
-from utility.TDCell import TDCell
+from utility.benchmark import Bench
from base.data_set import create_dataset
from base.trait import GeneralTrait
from wqflask import parser
@@ -124,13 +123,16 @@ class SearchResultPage(object):
FROM QuickSearch
WHERE MATCH (terms)
AGAINST ('{}' IN BOOLEAN MODE) """.format(search_terms)
- dbresults = g.db.execute(query, no_parameters=True).fetchall()
+ #print("query is: ", query)
+
+ with Bench("Doing QuickSearch Query: "):
+ dbresults = g.db.execute(query, no_parameters=True).fetchall()
#print("results: ", pf(results))
self.results = collections.defaultdict(list)
type_dict = {'PublishXRef': 'phenotype',
- 'ProbesetXRef': 'mrna_assay',
+ 'ProbeSetXRef': 'mrna_assay',
'GenoXRef': 'genotype'}
for dbresult in dbresults:
@@ -141,7 +143,7 @@ class SearchResultPage(object):
self.results[type_dict[dbresult.table_name]].append(this_result)
- print("results: ", pf(self.results['phenotype']))
+ #print("results: ", pf(self.results['phenotype']))
#def quick_search(self):
# self.search_terms = parser.parse(self.search_terms)
@@ -209,6 +211,6 @@ class SearchResultPage(object):
self.dataset,
)
self.results.extend(the_search.run())
- print("in the search results are:", self.results)
+ #print("in the search results are:", self.results)
self.header_fields = the_search.header_fields
diff --git a/wqflask/wqflask/templates/quick_search.html b/wqflask/wqflask/templates/quick_search.html
index d50b4937..769c40e6 100644
--- a/wqflask/wqflask/templates/quick_search.html
+++ b/wqflask/wqflask/templates/quick_search.html
@@ -42,9 +42,11 @@
-
+ {% endfor %}
+ #}
Id |
Species |
Group |
--
cgit v1.2.3
From ad95e4f03ea5c5f7ee4a7eca1b001a45666f637c Mon Sep 17 00:00:00 2001
From: Zachary Sloan
Date: Tue, 7 May 2013 23:26:55 +0000
Subject: Made some progress adding species/group tabs to the quick search
result page to make the results easier to navigate
---
wqflask/wqflask/search_results.py | 21 +-
wqflask/wqflask/templates/quick_search.html | 323 +++++++++++++++++++---------
2 files changed, 235 insertions(+), 109 deletions(-)
diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py
index 89f146b3..5f3c036f 100644
--- a/wqflask/wqflask/search_results.py
+++ b/wqflask/wqflask/search_results.py
@@ -74,6 +74,7 @@ class SearchResultPage(object):
self.search_terms = kw['q']
print("self.search_terms is: ", self.search_terms)
self.quick_search()
+ self.get_group_species_tree()
else:
self.results = []
#self.quick_search = False
@@ -134,16 +135,32 @@ class SearchResultPage(object):
type_dict = {'PublishXRef': 'phenotype',
'ProbeSetXRef': 'mrna_assay',
'GenoXRef': 'genotype'}
-
+
+ self.species_groups = {}
for dbresult in dbresults:
this_result = {}
this_result['table_name'] = dbresult.table_name
this_result['key'] = dbresult.the_key
this_result['result_fields'] = json.loads(dbresult.result_fields)
-
+ this_species = this_result['result_fields']['species']
+ this_group = this_result['result_fields']['group_name']
+ if type_dict[dbresult.table_name] not in self.species_groups:
+ self.species_groups[type_dict[dbresult.table_name]] = {}
+ if this_species not in self.species_groups[type_dict[dbresult.table_name]]:
+ self.species_groups[type_dict[dbresult.table_name]][this_species] = collections.defaultdict(list)
+ if this_group not in self.species_groups[type_dict[dbresult.table_name]][this_species]:
+ self.species_groups[type_dict[dbresult.table_name]][this_species].append(this_group)
self.results[type_dict[dbresult.table_name]].append(this_result)
#print("results: ", pf(self.results['phenotype']))
+
+ #def get_group_species_tree(self):
+ # self.species_groups = collections.default_dict(list)
+ # for key in self.results:
+ # for item in self.results[key]:
+ # self.species_groups[item['result_fields']['species']].append(
+ # item['result_fields']['group_name'])
+
#def quick_search(self):
# self.search_terms = parser.parse(self.search_terms)
diff --git a/wqflask/wqflask/templates/quick_search.html b/wqflask/wqflask/templates/quick_search.html
index 769c40e6..9d5b0c74 100644
--- a/wqflask/wqflask/templates/quick_search.html
+++ b/wqflask/wqflask/templates/quick_search.html
@@ -21,9 +21,7 @@
{% if search_terms %}
-
- {% for word in search_terms %}
- {{word}} {% if not loop.last %} or {% endif %}
- {% endfor %}
+ {{search_terms}}
{% endif %}
@@ -33,20 +31,166 @@
+ {% if results.phenotype %}
- Phenotype
+ {% endif %}
+ {% if results.mrna_assay %}
- mRNA Assay
+ {% endif %}
+ {% if results.genotype %}
- Genotype
+ {% endif %}
-
+
+
+ {% for species in species_groups.phenotype %}
+ - {{ species }}
+ {% endfor %}
+
+
+ {% for species in species_groups.phenotype %}
+
+
+
+
+ Id |
+ Species |
+ Group |
+ Description |
+ LRS |
+ Year |
+ Authors |
+
+
+
+ {% for result in results.phenotype %}
+ {% if result.result_fields['species'] == species %}
+
+ {{ result.result_fields['phenotype_id'] }} |
+ {{ result.result_fields['species'] }} |
+ {{ result.result_fields['group_name'] }} |
+ {{ result.result_fields['description'] }} |
+ {{ result.result_fields['lrs'] }} |
+
+
+ {{ result.result_fields['year'] }}
+
+ |
+ {{ result.result_fields['authors'] }} |
+
+ {% endif %}
+ {% endfor %}
+
+
+
+ {% endfor %}
+
+
+
+
+
+
+ {% for species in species_groups.mrna_assay %}
+ - {{ species }}
+ {% endfor %}
+
+
+ {% for species in species_groups.mrna_assay %}
+
+
+
+
+ Record ID |
+ Species |
+ Group |
+ Data Set |
+ Symbol |
+ Description |
+ Location |
+ Mean Expr |
+ Max LRS |
+
+
+
+ {% for result in results.mrna_assay %}
+ {% if result.result_fields['species'] == species %}
+
+
+
+ |
+ {{ result.result_fields['species'] }} |
+ {{ result.result_fields['group_name'] }} |
+ {{ result.result_fields['dataset_name'] }} |
+ {{ result.result_fields['symbol'] }} |
+ {{ result.result_fields['description'] }} |
+ {{ result.result_fields['chr'] }} : {{ result['mb'] }} |
+ {{ result.result_fields['mean'] }} |
+ {{ result.result_fields['lrs'] }} |
+
+ {% endif %}
+ {% endfor %}
+
+
+
+ {% endfor %}
+
+
+
+
+
+
+ {% for species in species_groups.genotype %}
+ - {{ species }}
+ {% endfor %}
+
+
+ {% for species in species_groups.genotype %}
+
+
+
+
+ Marker |
+ Species |
+ Group |
+ Data Set |
+ Location |
+
+
+
+ {% for result in results.genotype %}
+ {% if result.result_fields['species'] == species %}
+
+
+
+ {{ result.result_fields['marker_name'] }}
+
+ |
+ {{ result.result_fields['species'] }} |
+ {{ result.result_fields['group_name'] }} |
+ {{ result.result_fields['dataset_name'] }} |
+ {{ result.result_fields['chr'] }} : {{ result.result_fields['mb'] }} |
+
+ {% endif %}
+ {% endfor %}
+
+
+
+ {% endfor %}
+
+
+
+
+
+
+
+ {#
+
- {#
- {% for key, _value in results.phenotype[0].result_fields.items() %}
- {{key}} |
- {% endfor %}
- #}
Id |
Species |
Group |
@@ -59,26 +203,24 @@
{% for result in results.phenotype %}
- {% for result in result.result_fields.items() %}
- {{ result['phenotype_id'] }} |
- {{ result['species'] }} |
- {{ result['group_name'] }} |
- {{ result['description'] }} |
- {{ result['lrs'] }} |
+ {{ result.result_fields['phenotype_id'] }} |
+ {{ result.result_fields['species'] }} |
+ {{ result.result_fields['group_name'] }} |
+ {{ result.result_fields['description'] }} |
+ {{ result.result_fields['lrs'] }} |
-
- {{ result['year'] }}
+
+ {{ result.result_fields['year'] }}
|
- {{ result['authors'] }} |
- {% endfor %}
+ {{ result.result_fields['authors'] }} |
{% endfor %}
-
+
Record ID |
@@ -95,28 +237,26 @@
{% for result in results.mrna_assay %}
- {% for result in result.result_fields.items() %}
-
|
- {{ result['species'] }} |
- {{ result['group_name'] }} |
- {{ result['dataset_name'] }} |
- {{ result['symbol'] }} |
- {{ result['description'] }} |
- {{ result['chr'] }} : {{ result['mb'] }} |
- {{ result['mean'] }} |
- {{ result['lrs'] }} |
- {% endfor %}
+ {{ result.result_fields['species'] }} |
+ {{ result.result_fields['group_name'] }} |
+ {{ result.result_fields['dataset_name'] }} |
+ {{ result.result_fields['symbol'] }} |
+ {{ result.result_fields['description'] }} |
+ {{ result.result_fields['chr'] }} : {{ result['mb'] }} |
+ {{ result.result_fields['mean'] }} |
+ {{ result.result_fields['lrs'] }} |
{% endfor %}
-
+
Marker |
@@ -127,91 +267,60 @@
- {% for result in results.mrna_assay %}
+ {% for result in results.genotype %}
- {% for result in result.result_fields.items() %}
-
- {{ result['marker_name'] }}
+
+ {{ result.result_fields['marker_name'] }}
|
- {{ result['species'] }} |
- {{ result['group_name'] }} |
- {{ result['dataset_name'] }} |
- {{ result['chr'] }} : {{ result['mb'] }} |
- {% endfor %}
+ {{ result.result_fields['species'] }} |
+ {{ result.result_fields['group_name'] }} |
+ {{ result.result_fields['dataset_name'] }} |
+ {{ result.result_fields['chr'] }} : {{ result.result_fields['mb'] }} |
{% endfor %}
-
-
-
-
-
-
+
-
+{% endblock %}
-
+{% block js %}
+
+
+
+
+
+
+
{% endblock %}
--
cgit v1.2.3
From 5a3f413da480123e3ad943b5f556e0a557f185cc Mon Sep 17 00:00:00 2001
From: Zachary Sloan
Date: Thu, 9 May 2013 22:54:34 +0000
Subject: Just added some print statements so I can show matrix/vector shapes
to Tony
---
wqflask/base/data_set.py | 2 +-
.../wqflask/marker_regression/marker_regression.py | 5 +++--
wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 24 +++++++++++++++-------
3 files changed, 21 insertions(+), 10 deletions(-)
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 1520b180..d7328441 100755
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -385,7 +385,7 @@ class PhenotypeDataSet(DataSet):
continue # for now
if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait(privilege=self.privilege, userName=self.userName, authorized_users=this_trait.authorized_users):
description = this_trait.pre_publication_description
- this_trait.description_display = description
+ this_trait.description_display = unicode(description, "utf8")
if not this_trait.year.isdigit():
this_trait.pubmed_text = "N/A"
diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py
index 6ae1318e..334ce631 100755
--- a/wqflask/wqflask/marker_regression/marker_regression.py
+++ b/wqflask/wqflask/marker_regression/marker_regression.py
@@ -78,8 +78,9 @@ class MarkerRegression(object):
genotype_matrix = np.array(trimmed_genotype_data).T
- print("pheno_vector is: ", pf(pheno_vector))
- print("genotype_matrix is: ", pf(genotype_matrix))
+ print("pheno_vector: ", pf(pheno_vector))
+ print("genotype_matrix: ", pf(genotype_matrix))
+ print("genotype_matrix.shape: ", pf(genotype_matrix.shape))
t_stats, p_values = lmm.run(
pheno_vector,
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index fc021a0b..62fb0fbd 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -58,6 +58,10 @@ def run_human(pheno_vector,
identifier = str(uuid.uuid4())
+ print("pheno_vector: ", pf(pheno_vector))
+ print("kinship_matrix: ", pf(kinship_matrix))
+ print("kinship_matrix.shape: ", pf(kinship_matrix.shape))
+
lmm_vars = pickle.dumps(dict(
pheno_vector = pheno_vector,
covariate_matrix = covariate_matrix,
@@ -70,12 +74,12 @@ def run_human(pheno_vector,
pheno_vector = pheno_vector[keep]
#print("pheno_vector shape is now: ", pf(pheno_vector.shape))
covariate_matrix = covariate_matrix[keep,:]
- print("kinship_matrix shape is: ", pf(kinship_matrix.shape))
+ #print("kinship_matrix shape is: ", pf(kinship_matrix.shape))
#print("len(keep) is: ", pf(keep.shape))
kinship_matrix = kinship_matrix[keep,:][:,keep]
n = kinship_matrix.shape[0]
- print("n is:", n)
+ #print("n is:", n)
lmm_ob = LMM(pheno_vector,
kinship_matrix,
covariate_matrix)
@@ -86,7 +90,7 @@ def run_human(pheno_vector,
p_values = []
t_stats = []
- print("input_file: ", plink_input_file)
+ #print("input_file: ", plink_input_file)
with Bench("Opening and loading pickle file"):
with gzip.open(plink_input_file, "rb") as input_file:
@@ -103,6 +107,8 @@ def run_human(pheno_vector,
with Bench("Create list of inputs"):
inputs = list(plink_input)
+
+ print("len(genotypes): ", len(inputs))
with Bench("Divide into chunks"):
results = chunks.divide_into_chunks(inputs, 64)
@@ -116,7 +122,7 @@ def run_human(pheno_vector,
timestamp = datetime.datetime.utcnow().isoformat()
- print("Starting adding loop")
+ #print("Starting adding loop")
for part, result in enumerate(results):
#data = pickle.dumps(result, pickle.HIGHEST_PROTOCOL)
holder = pickle.dumps(dict(
@@ -126,10 +132,10 @@ def run_human(pheno_vector,
result = result
), pickle.HIGHEST_PROTOCOL)
- print("Adding:", part)
+ #print("Adding:", part)
Redis.rpush(key, zlib.compress(holder))
- print("End adding loop")
- print("***** Added to {} queue *****".format(key))
+ #print("End adding loop")
+ #print("***** Added to {} queue *****".format(key))
for snp, this_id in plink_input:
#with Bench("part before association"):
if count > 2000:
@@ -157,6 +163,10 @@ def run_human(pheno_vector,
return p_values, t_stats
+#class HumanAssociation(object):
+# def __init__(self):
+#
+
def human_association(snp,
n,
keep,
--
cgit v1.2.3
From 04b61737236b837e91355b66cbaab3549bc39140 Mon Sep 17 00:00:00 2001
From: Zachary Sloan
Date: Thu, 16 May 2013 23:31:54 +0000
Subject: Made the first level of quick search tabs species, followed by trait
type (was the opposite before Rob commented on it earlier today)
---
wqflask/wqflask/search_results.py | 19 ++++---
wqflask/wqflask/templates/quick_search.html | 86 +++++++++++------------------
2 files changed, 45 insertions(+), 60 deletions(-)
diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py
index 5f3c036f..9b2751e0 100644
--- a/wqflask/wqflask/search_results.py
+++ b/wqflask/wqflask/search_results.py
@@ -74,7 +74,6 @@ class SearchResultPage(object):
self.search_terms = kw['q']
print("self.search_terms is: ", self.search_terms)
self.quick_search()
- self.get_group_species_tree()
else:
self.results = []
#self.quick_search = False
@@ -144,12 +143,18 @@ class SearchResultPage(object):
this_result['result_fields'] = json.loads(dbresult.result_fields)
this_species = this_result['result_fields']['species']
this_group = this_result['result_fields']['group_name']
- if type_dict[dbresult.table_name] not in self.species_groups:
- self.species_groups[type_dict[dbresult.table_name]] = {}
- if this_species not in self.species_groups[type_dict[dbresult.table_name]]:
- self.species_groups[type_dict[dbresult.table_name]][this_species] = collections.defaultdict(list)
- if this_group not in self.species_groups[type_dict[dbresult.table_name]][this_species]:
- self.species_groups[type_dict[dbresult.table_name]][this_species].append(this_group)
+ if this_species not in self.species_groups:
+ self.species_groups[this_species] = {}
+ if type_dict[dbresult.table_name] not in self.species_groups[this_species]:
+ self.species_groups[this_species][type_dict[dbresult.table_name]] = []
+ if this_group not in self.species_groups[this_species][type_dict[dbresult.table_name]]:
+ self.species_groups[this_species][type_dict[dbresult.table_name]].append(this_group)
+ #if type_dict[dbresult.table_name] not in self.species_groups:
+ # self.species_groups[type_dict[dbresult.table_name]] = {}
+ #if this_species not in self.species_groups[type_dict[dbresult.table_name]]:
+ # self.species_groups[type_dict[dbresult.table_name]][this_species] = []
+ #if this_group not in self.species_groups[type_dict[dbresult.table_name]][this_species]:
+ # self.species_groups[type_dict[dbresult.table_name]][this_species].append(this_group)
self.results[type_dict[dbresult.table_name]].append(this_result)
#print("results: ", pf(self.results['phenotype']))
diff --git a/wqflask/wqflask/templates/quick_search.html b/wqflask/wqflask/templates/quick_search.html
index 9d5b0c74..5877a840 100644
--- a/wqflask/wqflask/templates/quick_search.html
+++ b/wqflask/wqflask/templates/quick_search.html
@@ -25,33 +25,31 @@
{% endif %}
-
- To study a record, click on its ID below.
- Check records below and click Add button to add to selection.
-
+
- {% if results.phenotype %}
- - Phenotype
- {% endif %}
- {% if results.mrna_assay %}
- - mRNA Assay
- {% endif %}
- {% if results.genotype %}
- - Genotype
- {% endif %}
+ {% for species in species_groups %}
+ - {{ species }}
+ {% endfor %}
-
-
+ {% for species in species_groups %}
+
+
- {% for species in species_groups.phenotype %}
- - {{ species }}
- {% endfor %}
+ {% if species_groups[species]['phenotype'] %}
+ - Phenotype
+ {% endif %}
+ {% if species_groups[species]['mrna_assay'] %}
+ - mRNA Assay
+ {% endif %}
+ {% if species_groups[species]['genotype'] %}
+ - Genotype
+ {% endif %}
- {% for species in species_groups.phenotype %}
-
+ {% if species_groups[species]['phenotype'] %}
+
- {% endfor %}
-
-
-
-
-
-
- {% for species in species_groups.mrna_assay %}
- - {{ species }}
- {% endfor %}
-
-
- {% for species in species_groups.mrna_assay %}
-
+ {% endif %}
+ {% if species_groups[species]['mrna_assay'] %}
+
- {% endfor %}
-
-
-
-
-
-
- {% for species in species_groups.genotype %}
- - {{ species }}
- {% endfor %}
-
-
- {% for species in species_groups.genotype %}
-
+ {% endif %}
+ {% if species_groups[species]['genotype'] %}
+
- {% endfor %}
+ {% endif %}
+ {% endfor %}
-
+
+
+
+{% endblock %}
+
{#
@@ -285,9 +266,6 @@
#}
-
-
-{% endblock %}
{% block js %}
@@ -299,6 +277,8 @@
@@ -274,9 +175,14 @@
-
@@ -175,16 +275,9 @@
+
{% endblock %}
+
diff --git a/wqflask/wqflask/templates/show_trait_details.html b/wqflask/wqflask/templates/show_trait_details.html
index c3abfc9f..b57c3c21 100644
--- a/wqflask/wqflask/templates/show_trait_details.html
+++ b/wqflask/wqflask/templates/show_trait_details.html
@@ -19,11 +19,11 @@
BLAT Specifity
- {{ "%.1f" % (this_trait.probe_set_specificity) }}
+ {{ "%s" % (this_trait.probe_set_specificity) }}
{% endif %}
{% if this_trait.probe_set_blat_score %}
BLAT Score
- {{ "%i" % (this_trait.probe_set_blat_score) }}
+ {{ "%s" % (this_trait.probe_set_blat_score) }}
{% endif %}
--
cgit v1.2.3
From aac1dd2f9c5b216b24c6e35676ba5d50f9d5d3c2 Mon Sep 17 00:00:00 2001
From: Zachary Sloan
Date: Thu, 13 Jun 2013 20:15:30 +0000
Subject: Put the template html for the quick search page when all trait types
are displayed in a separate file that is included in quick_search.html
---
wqflask/wqflask/templates/all_results.html | 134 +++++++++++++++++++++++++++++
1 file changed, 134 insertions(+)
create mode 100644 wqflask/wqflask/templates/all_results.html
diff --git a/wqflask/wqflask/templates/all_results.html b/wqflask/wqflask/templates/all_results.html
new file mode 100644
index 00000000..a42e42d8
--- /dev/null
+++ b/wqflask/wqflask/templates/all_results.html
@@ -0,0 +1,134 @@
+
+ {% for species in species_groups %}
+ - {{ species }}
+ {% endfor %}
+
+
+ {% for species in species_groups %}
+
+
+
+ {% if species_groups[species]['phenotype'] %}
+ - Phenotype
+ {% endif %}
+ {% if species_groups[species]['mrna_assay'] %}
+ - mRNA Assay
+ {% endif %}
+ {% if species_groups[species]['genotype'] %}
+ - Genotype
+ {% endif %}
+
+
+ {% if species_groups[species]['phenotype'] %}
+
+
+
+
+ Id |
+ Species |
+ Group |
+ Description |
+ LRS |
+ Year |
+ Authors |
+
+
+
+ {% for result in results.phenotype %}
+ {% if result.result_fields['species'] == species %}
+
+ {{ result.result_fields['phenotype_id'] }} |
+ {{ result.result_fields['species'] }} |
+ {{ result.result_fields['group_name'] }} |
+ {{ result.result_fields['description'] }} |
+ {{ result.result_fields['lrs'] }} |
+
+
+ {{ result.result_fields['year'] }}
+
+ |
+ {{ result.result_fields['authors'] }} |
+
+ {% endif %}
+ {% endfor %}
+
+
+
+ {% endif %}
+ {% if species_groups[species]['mrna_assay'] %}
+
+
+
+
+ Record ID |
+ Species |
+ Group |
+ Data Set |
+ Symbol |
+ Description |
+ Location |
+ Mean Expr |
+ Max LRS |
+
+
+
+ {% for result in results.mrna_assay %}
+ {% if result.result_fields['species'] == species %}
+
+
+
+ |
+ {{ result.result_fields['species'] }} |
+ {{ result.result_fields['group_name'] }} |
+ {{ result.result_fields['dataset_name'] }} |
+ {{ result.result_fields['symbol'] }} |
+ {{ result.result_fields['description'] }} |
+ {{ result.result_fields['chr'] }} : {{ result['mb'] }} |
+ {{ result.result_fields['mean'] }} |
+ {{ result.result_fields['lrs'] }} |
+
+ {% endif %}
+ {% endfor %}
+
+
+
+ {% endif %}
+ {% if species_groups[species]['genotype'] %}
+
+
+
+
+ Marker |
+ Species |
+ Group |
+ Data Set |
+ Location |
+
+
+
+ {% for result in results.genotype %}
+ {% if result.result_fields['species'] == species %}
+
+
+
+ {{ result.result_fields['marker_name'] }}
+
+ |
+ {{ result.result_fields['species'] }} |
+ {{ result.result_fields['group_name'] }} |
+ {{ result.result_fields['dataset_name'] }} |
+ {{ result.result_fields['chr'] }} : {{ result.result_fields['mb'] }} |
+
+ {% endif %}
+ {% endfor %}
+
+
+
+ {% endif %}
+
+
+
+ {% endfor %}
+
\ No newline at end of file
--
cgit v1.2.3