From 4e6722beb4302bf22719ad783443767bb1ba7f6a Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Wed, 24 Apr 2013 19:35:55 +0000 Subject: Fixed issue with the part of quick_search_table.py that loads phenotype traits --- wqflask/maintenance/quick_search_table.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'wqflask/maintenance') diff --git a/wqflask/maintenance/quick_search_table.py b/wqflask/maintenance/quick_search_table.py index 046a05c4..9ea8d2a3 100644 --- a/wqflask/maintenance/quick_search_table.py +++ b/wqflask/maintenance/quick_search_table.py @@ -136,7 +136,7 @@ class PublishXRef(Base): "Phenotype.Original_description as description, " "PublishXRef.LRS as lrs, " "PublishXRef.PublicationId as publication_id, " - "Publication.PubMed_ID" + "Publication.PubMed_ID, " "Publication.Year as year, " "Publication.Authors as authors " "FROM PublishXRef, " -- cgit v1.2.3 From 23dbf30e4c96faae3fda19f539be2a8b260dc957 Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Wed, 24 Apr 2013 19:56:39 +0000 Subject: Checking in quick_search_table.py before using pylin --- wqflask/maintenance/quick_search_table.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'wqflask/maintenance') diff --git a/wqflask/maintenance/quick_search_table.py b/wqflask/maintenance/quick_search_table.py index 9ea8d2a3..aa40badf 100644 --- a/wqflask/maintenance/quick_search_table.py +++ b/wqflask/maintenance/quick_search_table.py @@ -136,7 +136,7 @@ class PublishXRef(Base): "Phenotype.Original_description as description, " "PublishXRef.LRS as lrs, " "PublishXRef.PublicationId as publication_id, " - "Publication.PubMed_ID, " + "Publication.PubMed_ID as pubmed_id, " "Publication.Year as year, " "Publication.Authors as authors " "FROM PublishXRef, " @@ -450,11 +450,12 @@ QuickSearch = sa.Table("QuickSearch", Metadata, sa.Column('the_key', sa.String(30), primary_key=True, nullable=False, autoincrement=False), # key in database table sa.Column('terms', sa.Text), # terms to compare search string with - sa.Column('result_fields', sa.Text) # json + sa.Column('result_fields', sa.Text), # json + mysql_engine = 'MyISAM', ) -QuickSearch.drop(Engine, checkfirst=True) -Metadata.create_all(Engine) +#QuickSearch.drop(Engine, checkfirst=True) +#Metadata.create_all(Engine) def row2dict(row): @@ -476,8 +477,8 @@ def page_query(q): def main(): - ProbeSetXRef.run() - GenoXRef.run() + #ProbeSetXRef.run() + #GenoXRef.run() PublishXRef.run() if __name__ == "__main__": -- cgit v1.2.3 From 00f20d2d650c44794a8d21aad45d748e9aa0365b Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Wed, 24 Apr 2013 23:30:18 +0000 Subject: Began writing code that writes some objects used in LMM to redis and writes/reads eachitem in the plink_input --- misc/notes.txt | 8 ++ wqflask/maintenance/quick_search_table.py | 124 ++++++++++++++---------- wqflask/other_config/pylintrc | 6 +- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 38 ++++++-- wqflask/wqflask/my_pylmm/pyLMM/process_plink.py | 24 +++++ 5 files changed, 136 insertions(+), 64 deletions(-) create mode 100644 wqflask/wqflask/my_pylmm/pyLMM/process_plink.py (limited to 'wqflask/maintenance') diff --git a/misc/notes.txt b/misc/notes.txt index 6bdcccf3..5d4bcaeb 100644 --- a/misc/notes.txt +++ b/misc/notes.txt @@ -106,6 +106,14 @@ sudo /etc/init.d/redis_6379 start =========================================== +Run pylint: +pylint file_name + +For options: +pylint --help + +=========================================== + Start screen session: If "no option -R": diff --git a/wqflask/maintenance/quick_search_table.py b/wqflask/maintenance/quick_search_table.py index aa40badf..4f2cd8a9 100644 --- a/wqflask/maintenance/quick_search_table.py +++ b/wqflask/maintenance/quick_search_table.py @@ -1,17 +1,17 @@ -from __future__ import print_function, division, absolute_import +"""Creates a table used for the quick search feature. + +One column contains the terms to match the user's search against. Another contains the result +fields in json format -""" Results will be returned for each of several trait types: mRNA assays, phenotypes, genotypes, and (maybe later) genes -For each trait type, the results for each species should be given; for example, have a "Mouse" tab -with the mouse traits in a table inside it - -This table will then list each trait, its dataset, and several columns determined by its trait type -(phenotype, genotype, etc) +For each trait type, the results for each species should be given This table will then list +each trait, its dataset, and several columns determined by its trait type (phenotype, genotype, etc) """ +from __future__ import print_function, division, absolute_import import sys sys.path.append("../../..") @@ -19,19 +19,17 @@ sys.path.append("../../..") import simplejson as json import sqlalchemy as sa -from sqlalchemy.dialects import mysql -from sqlalchemy.orm import scoped_session, sessionmaker, relationship, backref -from sqlalchemy.orm.exc import NoResultFound +from sqlalchemy.orm import scoped_session, sessionmaker from sqlalchemy.ext.declarative import declarative_base -from pprint import pformat as pf +#from pprint import pformat as pf import zach_settings as settings Engine = sa.create_engine(settings.SQLALCHEMY_DATABASE_URI, #encoding='utf-8', #client_encoding='utf-8', - #echo="debug", + #echo="debug",w ) Session = scoped_session(sessionmaker(bind=Engine)) #, extension=VersionedListener())) @@ -41,6 +39,14 @@ Metadata = sa.MetaData() Metadata.bind = Engine class PublishXRef(Base): + """Class that corresponds with the PublishXRef table in the database. + + The PublishXRef table links phenotype traits and their publications. + + This class is used to add phenotype traits to the quick search table. + + """ + __tablename__ = 'PublishXRef' Id = sa.Column(sa.Integer, primary_key=True) @@ -56,15 +62,16 @@ class PublishXRef(Base): @classmethod def run(cls): + """Connects to database and inserts phenotype trait info into the Quicksearch table.""" conn = Engine.connect() counter = 0 - for ps in page_query(Session.query(cls)): #all() + for pub_row in page_query(Session.query(cls)): #all() values = {} values['table_name'] = cls.__tablename__ - values['the_key'] = json.dumps([ps.Id, ps.InbredSetId]) - values['terms'] = cls.get_unique_terms(ps.Id, ps.InbredSetId) + values['the_key'] = json.dumps([pub_row.Id, pub_row.InbredSetId]) + values['terms'] = cls.get_unique_terms(pub_row.Id, pub_row.InbredSetId) print("terms is:", values['terms']) - values['result_fields'] = cls.get_result_fields(ps.Id, ps.InbredSetId) + values['result_fields'] = cls.get_result_fields(pub_row.Id, pub_row.InbredSetId) ins = QuickSearch.insert().values(**values) conn.execute(ins) counter += 1 @@ -72,6 +79,7 @@ class PublishXRef(Base): @staticmethod def get_unique_terms(publishxref_id, inbredset_id): + """Finds unique terms for each item in the PublishXRef table to match a query against""" results = Session.query( "pre_publication_description", "post_publication_description", @@ -119,6 +127,7 @@ class PublishXRef(Base): @staticmethod def get_result_fields(publishxref_id, inbredset_id): + """Gets the result fields (columns) that appear on the result page as a json string""" results = Session.query( "phenotype_id", "species", @@ -150,17 +159,10 @@ class PublishXRef(Base): "PublishXRef.PublicationId = Publication.Id and " "InbredSet.Id = :inbredset_id and " "Species.Id = InbredSet.SpeciesId ").params(publishxref_id=publishxref_id, - inbredset_id=inbredset_id).all() - #"InbredSet.SpeciesId = Species.Id and " - #"Geno.SpeciesId = Species.Id and " - #"Geno.Name = PublishXRef.Locus ").params(publishxref_id=publishxref_id, - # inbredset_id=inbredset_id).all() - #for result in results: - # print("****", result) + inbredset_id=inbredset_id).all() assert len(set(result for result in results)) == 1, "Different results or no results" - #print("results are:", results) result = results[0] result = row2dict(result) try: @@ -176,7 +178,16 @@ class PublishXRef(Base): return json_results + class GenoXRef(Base): + """Class that corresponds with the GenoXRef table in the database. + + The GenoXRef table links genotype traits and their data. + + This class is used to add genotype traits to the quick search table. + + """ + __tablename__ = 'GenoXRef' GenoFreezeId = sa.Column(sa.Integer, primary_key=True) @@ -187,6 +198,7 @@ class GenoXRef(Base): @classmethod def run(cls): + """Connects to database and inserts genotype trait info into the Quicksearch table.""" conn = Engine.connect() counter = 0 for item in page_query(Session.query(cls)): #all() @@ -201,9 +213,10 @@ class GenoXRef(Base): conn.execute(ins) counter += 1 print("Done:", counter) - + @staticmethod def get_unique_terms(geno_id): + """Finds unique terms for each item in the GenoXRef table to match a query against""" print("geno_id: ", geno_id) results = Session.query( "name", @@ -214,8 +227,6 @@ class GenoXRef(Base): "FROM Geno " "WHERE Geno.Id = :geno_id ").params(geno_id=geno_id).all() - #print("results: ", pf(results)) - unique = set() if len(results): for item in results[0]: @@ -234,12 +245,12 @@ class GenoXRef(Base): continue unique.add(token) - #print("\nUnique terms are: {}\n".format(unique)) return " ".join(unique) @staticmethod def get_result_fields(geno_id, dataset_id): + """Gets the result fields (columns) that appear on the result page as a json string""" results = Session.query( "name", "marker_name", @@ -271,11 +282,8 @@ class GenoXRef(Base): "InbredSet.Id = GenoFreeze.InbredSetId and " "InbredSet.SpeciesId = Species.Id ").params(geno_id=geno_id, dataset_id=dataset_id).all() - #for result in results: - # print(result) assert len(set(result for result in results)) == 1, "Different results" - #print("results are:", results) result = results[0] result = row2dict(result) try: @@ -292,6 +300,14 @@ class GenoXRef(Base): return json_results class ProbeSetXRef(Base): + """Class that corresponds with the ProbeSetXRef table in the database. + + The ProbeSetXRef table links mRNA expression traits and their sample data. + + This class is used to add mRNA expression traits to the quick search table. + + """ + __tablename__ = 'ProbeSetXRef' ProbeSetFreezeId = sa.Column(sa.Integer, primary_key=True) @@ -310,16 +326,17 @@ class ProbeSetXRef(Base): @classmethod def run(cls): + """Connects to db and inserts mRNA expression trait info into the Quicksearch table.""" conn = Engine.connect() counter = 0 - for ps in page_query(Session.query(cls)): #all() + for ps_row in page_query(Session.query(cls)): #all() values = {} values['table_name'] = cls.__tablename__ - values['the_key'] = json.dumps([ps.ProbeSetId, ps.ProbeSetFreezeId]) - values['terms'] = cls.get_unique_terms(ps.ProbeSetId) + values['the_key'] = json.dumps([ps_row.ProbeSetId, ps_row.ProbeSetFreezeId]) + values['terms'] = cls.get_unique_terms(ps_row.ProbeSetId) print("terms is:", values['terms']) - #values['species'] = get_species("ProbeSet", ps.Id) - values['result_fields'] = cls.get_result_fields(ps.ProbeSetId, ps.ProbeSetFreezeId) + values['result_fields'] = cls.get_result_fields(ps_row.ProbeSetId, + ps_row.ProbeSetFreezeId) if values['result_fields'] == None: continue ins = QuickSearch.insert().values(**values) @@ -329,6 +346,7 @@ class ProbeSetXRef(Base): @staticmethod def get_unique_terms(probeset_id): + """Finds unique terms for each item in the ProbeSetXRef table to match a query against""" results = Session.query( "name", "symbol", @@ -345,7 +363,6 @@ class ProbeSetXRef(Base): unique = set() if len(results): for item in results[0]: - #print("locals:", locals()) if not item: continue for token in item.split(): @@ -366,12 +383,12 @@ class ProbeSetXRef(Base): continue unique.add(token) - #print("\nUnique terms are: {}\n".format(unique)) return " ".join(unique) @staticmethod def get_result_fields(probeset_id, dataset_id): + """Gets the result fields (columns) that appear on the result page as a json string""" print("probeset_id: ", probeset_id) print("dataset_id: ", dataset_id) results = Session.query( @@ -420,14 +437,10 @@ class ProbeSetXRef(Base): "ProbeFreeze.InbredSetId = InbredSet.Id and " "InbredSet.SpeciesId = Species.Id ").params(probeset_id=probeset_id, dataset_id=dataset_id).all() - #for result in results: - # print("-", result) if len(set(result for result in results)) != 1: return None - #assert len(set(result for result in results)) == 1, "Different results" - - #print("results are:", results) + result = results[0] result = row2dict(result) try: @@ -443,10 +456,10 @@ class ProbeSetXRef(Base): return json_results - QuickSearch = sa.Table("QuickSearch", Metadata, + # table_name is the table that item is inserted from sa.Column('table_name', sa.String(15), - primary_key=True, nullable=False, autoincrement=False), # table that item is inserted from + primary_key=True, nullable=False, autoincrement=False), sa.Column('the_key', sa.String(30), primary_key=True, nullable=False, autoincrement=False), # key in database table sa.Column('terms', sa.Text), # terms to compare search string with @@ -459,24 +472,29 @@ QuickSearch = sa.Table("QuickSearch", Metadata, def row2dict(row): - """http://stackoverflow.com/a/2848519/1175849""" + """From http://stackoverflow.com/a/2848519/1175849""" return dict(zip(row.keys(), row)) -def page_query(q): - """http://stackoverflow.com/a/1217947/1175849""" +def page_query(query): + """From http://stackoverflow.com/a/1217947/1175849""" offset = 0 while True: - r = False - for elem in q.limit(1000).offset(offset): - r = True - yield elem + rrr = False + for elem in query.limit(1000).offset(offset): + rrr = True + yield elem offset += 1000 - if not r: + if not rrr: break def main(): + """Populate the QuickSearch table that is used with the quick search features. + + Add all items from the ProbeSetXRef, GenoXRef, and PublishXRef tables to the QuickSearch tables. + + """ #ProbeSetXRef.run() #GenoXRef.run() PublishXRef.run() diff --git a/wqflask/other_config/pylintrc b/wqflask/other_config/pylintrc index 36d2bddf..b23af7a4 100644 --- a/wqflask/other_config/pylintrc +++ b/wqflask/other_config/pylintrc @@ -38,7 +38,7 @@ load-plugins= # --enable=similarities". If you want to run only the classes checker, but have # no Warning level messages displayed, use"--disable=all --enable=classes # --disable=W" -#disable= +disable=no-init, star-args, no-member, import-error [REPORTS] @@ -115,7 +115,7 @@ bad-functions=map,filter,apply,input module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ # Regular expression which should only match correct module level names -const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$ +const-rgx=(([A-Z_][A-Za-z0-9_]*)|(__.*__))$ # Regular expression which should only match correct class names class-rgx=[A-Z_][a-zA-Z0-9]+$ @@ -153,7 +153,7 @@ no-docstring-rgx=__.*__ [FORMAT] # Maximum number of characters on a single line. -max-line-length=80 +max-line-length=100 # Maximum number of lines in a module max-module-lines=1000 diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index a3ba8fdb..10221a2e 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -28,6 +28,7 @@ from scipy import stats import pdb import gzip +import datetime import cPickle as pickle import simplejson as json @@ -38,6 +39,9 @@ from utility import temp_data from wqflask.my_pylmm.pyLMM import chunks +import redis +Redis = redis.Redis() + #np.seterr('raise') def run_human(pheno_vector, @@ -65,6 +69,7 @@ def run_human(pheno_vector, covariate_matrix) lmm_ob.fit() + # Buffers for pvalues and t-stats p_values = [] t_stats = [] @@ -86,19 +91,36 @@ def run_human(pheno_vector, with Bench("Create list of inputs"): inputs = list(plink_input) - + with Bench("Divide into chunks"): results = chunks.divide_into_chunks(inputs, 64) result_store = [] - identifier = uuid.uuid4() - for part, result in enumerate(results): - # todo: Don't use TempData here. Instead revert old one and store this stuff as a list - data_store = temp_data.TempData(identifier, "plink", part) - - data_store.store("data", pickle.dumps(result, pickle.HIGHEST_PROTOCOL)) - result_store.append(data_store) + identifier = str(uuid.uuid4()) + + lmm_vars = pickle.dumps(dict( + pheno_vector = pheno_vector, + covariate_matrix = covariate_matrix, + kinship_matrix = kinship_matrix + )) + Redis.hset(identifier, "lmm_vars", pickle.dumps(lmm_vars)) + + key = "plink_inputs" + timestamp = datetime.datetime.utcnow().isoformat() + + for part, result in enumerate(results): + #data = pickle.dumps(result, pickle.HIGHEST_PROTOCOL) + holder = pickle.dumps(dict( + identifier = identifier, + part = part, + timestamp = timestamp, + result = result + ), pickle.HIGHEST_PROTOCOL) + print("Adding:", part) + Redis.rpush(key, holder) + + print("***** Added to {} queue *****".format(key)) for snp, this_id in plink_input: with Bench("part before association"): if count > 2000: diff --git a/wqflask/wqflask/my_pylmm/pyLMM/process_plink.py b/wqflask/wqflask/my_pylmm/pyLMM/process_plink.py new file mode 100644 index 00000000..5ce7adbc --- /dev/null +++ b/wqflask/wqflask/my_pylmm/pyLMM/process_plink.py @@ -0,0 +1,24 @@ +from __future__ import absolute_import, print_function, division + +import cPickle as pickle +import redis +Redis = redis.Redis() + +from wqflask.my_pylmm.pyLMM import lmm + +lmm_vars_pickled = Redis.get("lmm_vars") + + +plink_pickled = Redis.lpop("plink_inputs") + +plink_data = pickle.loads(plink_pickled) + + +ps, ts = lmm.human_association(snp, + n, + keep, + lmm_ob, + pheno_vector, + covariate_matrix, + kinship_matrix, + refit) \ No newline at end of file -- cgit v1.2.3 From 275f9210a7af6a2de32e8e102de816276b5ef237 Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Fri, 26 Apr 2013 16:21:38 +0000 Subject: Added file that checks how much time is added to a loop's runtime by print statements Continued work on file that processes each individual chunk of the snp iterator/plink input --- wqflask/maintenance/print_benchmark.py | 43 +++++++++++++++++++++++++ wqflask/wqflask/my_pylmm/pyLMM/process_plink.py | 6 +++- 2 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 wqflask/maintenance/print_benchmark.py (limited to 'wqflask/maintenance') diff --git a/wqflask/maintenance/print_benchmark.py b/wqflask/maintenance/print_benchmark.py new file mode 100644 index 00000000..540e0904 --- /dev/null +++ b/wqflask/maintenance/print_benchmark.py @@ -0,0 +1,43 @@ +#!/usr/bin/python + +from __future__ import absolute_import, print_function, division + +import time + +from pprint import pformat as pf + + +class TheCounter(object): + Counters = {} + + def __init__(self): + start_time = time.time() + for counter in range(170000): + self.print_it(counter) + self.time_took = time.time() - start_time + TheCounter.Counters[self.__class__.__name__] = self.time_took + +class PrintAll(TheCounter): + def print_it(self, counter): + print(counter) + +class PrintSome(TheCounter): + def print_it(self, counter): + if counter % 1000 == 0: + print(counter) + +class PrintNone(TheCounter): + def print_it(self, counter): + pass + + +def new_main(): + print("Running new_main") + tests = [PrintAll, PrintSome, PrintNone] + for test in tests: + test() + + print(pf(TheCounter.Counters)) + +if __name__ == '__main__': + new_main() \ No newline at end of file diff --git a/wqflask/wqflask/my_pylmm/pyLMM/process_plink.py b/wqflask/wqflask/my_pylmm/pyLMM/process_plink.py index 5ce7adbc..8f7ad243 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/process_plink.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/process_plink.py @@ -13,6 +13,9 @@ plink_pickled = Redis.lpop("plink_inputs") plink_data = pickle.loads(plink_pickled) +identifier = plink_data['identifier'] +print("identifier: ", identifier) + ps, ts = lmm.human_association(snp, n, @@ -21,4 +24,5 @@ ps, ts = lmm.human_association(snp, pheno_vector, covariate_matrix, kinship_matrix, - refit) \ No newline at end of file + refit) + -- cgit v1.2.3 From dd3f7bb79d39252a987826a9825d00da782ba58a Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Thu, 2 May 2013 22:58:18 +0000 Subject: Got quick search code running (but not displaying properly) Code that processes subset of snps works, in process of putting into a class --- wqflask/maintenance/quick_search_table.py | 4 +- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 79 +++++++------- wqflask/wqflask/my_pylmm/pyLMM/process_plink.py | 131 +++++++++++++++++++++--- wqflask/wqflask/search_results.py | 14 +-- wqflask/wqflask/templates/quick_search.html | 6 +- 5 files changed, 173 insertions(+), 61 deletions(-) (limited to 'wqflask/maintenance') diff --git a/wqflask/maintenance/quick_search_table.py b/wqflask/maintenance/quick_search_table.py index 4f2cd8a9..b07e7656 100644 --- a/wqflask/maintenance/quick_search_table.py +++ b/wqflask/maintenance/quick_search_table.py @@ -495,9 +495,9 @@ def main(): Add all items from the ProbeSetXRef, GenoXRef, and PublishXRef tables to the QuickSearch tables. """ - #ProbeSetXRef.run() + ProbeSetXRef.run() #GenoXRef.run() - PublishXRef.run() + #PublishXRef.run() if __name__ == "__main__": main() \ No newline at end of file diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 10221a2e..fc021a0b 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -28,6 +28,7 @@ from scipy import stats import pdb import gzip +import zlib import datetime import cPickle as pickle import simplejson as json @@ -55,15 +56,26 @@ def run_human(pheno_vector, keep = True - v keep = keep.reshape((len(keep),)) + identifier = str(uuid.uuid4()) + + lmm_vars = pickle.dumps(dict( + pheno_vector = pheno_vector, + covariate_matrix = covariate_matrix, + kinship_matrix = kinship_matrix + )) + Redis.hset(identifier, "lmm_vars", lmm_vars) + Redis.expire(identifier, 60*60) + if v.sum(): pheno_vector = pheno_vector[keep] #print("pheno_vector shape is now: ", pf(pheno_vector.shape)) covariate_matrix = covariate_matrix[keep,:] - #print("kinship_matrix shape is: ", pf(kinship_matrix.shape)) + print("kinship_matrix shape is: ", pf(kinship_matrix.shape)) #print("len(keep) is: ", pf(keep.shape)) kinship_matrix = kinship_matrix[keep,:][:,keep] n = kinship_matrix.shape[0] + print("n is:", n) lmm_ob = LMM(pheno_vector, kinship_matrix, covariate_matrix) @@ -96,19 +108,15 @@ def run_human(pheno_vector, results = chunks.divide_into_chunks(inputs, 64) result_store = [] - identifier = str(uuid.uuid4()) - - lmm_vars = pickle.dumps(dict( - pheno_vector = pheno_vector, - covariate_matrix = covariate_matrix, - kinship_matrix = kinship_matrix - )) - Redis.hset(identifier, "lmm_vars", pickle.dumps(lmm_vars)) - key = "plink_inputs" + + # Todo: Delete below line when done testing + Redis.delete(key) + timestamp = datetime.datetime.utcnow().isoformat() + print("Starting adding loop") for part, result in enumerate(results): #data = pickle.dumps(result, pickle.HIGHEST_PROTOCOL) holder = pickle.dumps(dict( @@ -117,33 +125,34 @@ def run_human(pheno_vector, timestamp = timestamp, result = result ), pickle.HIGHEST_PROTOCOL) + print("Adding:", part) - Redis.rpush(key, holder) - + Redis.rpush(key, zlib.compress(holder)) + print("End adding loop") print("***** Added to {} queue *****".format(key)) for snp, this_id in plink_input: - with Bench("part before association"): - if count > 2000: - break - count += 1 - - percent_complete = (float(count) / total_snps) * 100 - #print("percent_complete: ", percent_complete) - loading_progress.store("percent_complete", percent_complete) - - with Bench("actual association"): - ps, ts = human_association(snp, - n, - keep, - lmm_ob, - pheno_vector, - covariate_matrix, - kinship_matrix, - refit) - - with Bench("after association"): - p_values.append(ps) - t_stats.append(ts) + #with Bench("part before association"): + if count > 2000: + break + count += 1 + + percent_complete = (float(count) / total_snps) * 100 + #print("percent_complete: ", percent_complete) + loading_progress.store("percent_complete", percent_complete) + + #with Bench("actual association"): + ps, ts = human_association(snp, + n, + keep, + lmm_ob, + pheno_vector, + covariate_matrix, + kinship_matrix, + refit) + + #with Bench("after association"): + p_values.append(ps) + t_stats.append(ts) return p_values, t_stats @@ -326,7 +335,7 @@ def GWAS(pheno_vector, covariate_matrix - n x q covariate matrix restricted_max_likelihood - use restricted maximum likelihood refit - refit the variance component for each SNP - + """ if kinship_eigen_vals == None: kinship_eigen_vals = [] diff --git a/wqflask/wqflask/my_pylmm/pyLMM/process_plink.py b/wqflask/wqflask/my_pylmm/pyLMM/process_plink.py index 1274fe50..e47c18e1 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/process_plink.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/process_plink.py @@ -1,28 +1,127 @@ from __future__ import absolute_import, print_function, division +import sys +sys.path.append("../../..") + +print("sys.path: ", sys.path) + +import numpy as np + +import zlib import cPickle as pickle import redis Redis = redis.Redis() -from wqflask.my_pylmm.pyLMM import lmm - -lmm_vars_pickled = Redis.get("lmm_vars") - +import lmm -plink_pickled = Redis.lpop("plink_inputs") +class ProcessLmmChunk(object): + + def __init__(self): + self.get_snp_data() + self.get_lmm_vars() + + keep = self.trim_matrices() + + self.do_association(keep) + + print("p_values is: ", self.p_values) + + def get_snp_data(self): + plink_pickled = zlib.decompress(Redis.lpop("plink_inputs")) + plink_data = pickle.loads(plink_pickled) + + self.snps = np.array(plink_data['result']) + self.identifier = plink_data['identifier'] + + def get_lmm_vars(self): + lmm_vars_pickled = Redis.hget(self.identifier, "lmm_vars") + lmm_vars = pickle.loads(lmm_vars_pickled) + + self.pheno_vector = np.array(lmm_vars['pheno_vector']) + self.covariate_matrix = np.array(lmm_vars['covariate_matrix']) + self.kinship_matrix = np.array(lmm_vars['kinship_matrix']) + + def trim_matrices(self): + v = np.isnan(self.pheno_vector) + keep = True - v + keep = keep.reshape((len(keep),)) + + if v.sum(): + self.pheno_vector = self.pheno_vector[keep] + self.covariate_matrix = self.covariate_matrix[keep,:] + self.kinship_matrix = self.kinship_matrix[keep,:][:,keep] -plink_data = pickle.loads(plink_pickled) + return keep + + def do_association(self, keep): + n = self.kinship_matrix.shape[0] + lmm_ob = lmm.LMM(self.pheno_vector, + self.kinship_matrix, + self.covariate_matrix) + lmm_ob.fit() + + self.p_values = [] + + for snp in self.snps: + snp = snp[0] + p_value, t_stat = lmm.human_association(snp, + n, + keep, + lmm_ob, + self.pheno_vector, + self.covariate_matrix, + self.kinship_matrix, + False) + + self.p_values.append(p_value) + -identifier = plink_data['identifier'] -print("identifier: ", identifier) +#plink_pickled = zlib.decompress(Redis.lpop("plink_inputs")) +# +#plink_data = pickle.loads(plink_pickled) +#result = np.array(plink_data['result']) +#print("snp size is: ", result.shape) +#identifier = plink_data['identifier'] +# +#lmm_vars_pickled = Redis.hget(identifier, "lmm_vars") +#lmm_vars = pickle.loads(lmm_vars_pickled) +# +#pheno_vector = np.array(lmm_vars['pheno_vector']) +#covariate_matrix = np.array(lmm_vars['covariate_matrix']) +#kinship_matrix = np.array(lmm_vars['kinship_matrix']) +# +#v = np.isnan(pheno_vector) +#keep = True - v +#keep = keep.reshape((len(keep),)) +#print("keep is: ", keep) +# +#if v.sum(): +# pheno_vector = pheno_vector[keep] +# covariate_matrix = covariate_matrix[keep,:] +# kinship_matrix = kinship_matrix[keep,:][:,keep] +# +#n = kinship_matrix.shape[0] +#print("n is: ", n) +#lmm_ob = lmm.LMM(pheno_vector, +# kinship_matrix, +# covariate_matrix) +#lmm_ob.fit() +# +#p_values = [] +# +#for snp in result: +# snp = snp[0] +# p_value, t_stat = lmm.human_association(snp, +# n, +# keep, +# lmm_ob, +# pheno_vector, +# covariate_matrix, +# kinship_matrix, +# False) +# +# p_values.append(p_value) + -ps, ts = lmm.human_association(snp, - n, - keep, - lmm_ob, - pheno_vector, - covariate_matrix, - kinship_matrix, - refit) diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py index dc872a8b..89f146b3 100644 --- a/wqflask/wqflask/search_results.py +++ b/wqflask/wqflask/search_results.py @@ -26,8 +26,7 @@ from MySQLdb import escape_string as escape from htmlgen import HTMLgen2 as HT from base import webqtlConfig -from utility.THCell import THCell -from utility.TDCell import TDCell +from utility.benchmark import Bench from base.data_set import create_dataset from base.trait import GeneralTrait from wqflask import parser @@ -124,13 +123,16 @@ class SearchResultPage(object): FROM QuickSearch WHERE MATCH (terms) AGAINST ('{}' IN BOOLEAN MODE) """.format(search_terms) - dbresults = g.db.execute(query, no_parameters=True).fetchall() + #print("query is: ", query) + + with Bench("Doing QuickSearch Query: "): + dbresults = g.db.execute(query, no_parameters=True).fetchall() #print("results: ", pf(results)) self.results = collections.defaultdict(list) type_dict = {'PublishXRef': 'phenotype', - 'ProbesetXRef': 'mrna_assay', + 'ProbeSetXRef': 'mrna_assay', 'GenoXRef': 'genotype'} for dbresult in dbresults: @@ -141,7 +143,7 @@ class SearchResultPage(object): self.results[type_dict[dbresult.table_name]].append(this_result) - print("results: ", pf(self.results['phenotype'])) + #print("results: ", pf(self.results['phenotype'])) #def quick_search(self): # self.search_terms = parser.parse(self.search_terms) @@ -209,6 +211,6 @@ class SearchResultPage(object): self.dataset, ) self.results.extend(the_search.run()) - print("in the search results are:", self.results) + #print("in the search results are:", self.results) self.header_fields = the_search.header_fields diff --git a/wqflask/wqflask/templates/quick_search.html b/wqflask/wqflask/templates/quick_search.html index d50b4937..769c40e6 100644 --- a/wqflask/wqflask/templates/quick_search.html +++ b/wqflask/wqflask/templates/quick_search.html @@ -42,9 +42,11 @@ - + {% endfor %} + #} -- cgit v1.2.3
Id Species Group