diff options
author | Zachary Sloan | 2013-07-23 17:12:29 -0500 |
---|---|---|
committer | Zachary Sloan | 2013-07-23 17:12:29 -0500 |
commit | 930d8b0cf0c7bf88fee1de95852dfe883418a494 (patch) | |
tree | 04d542a01dba0c4529f55912ab242a3bc61459aa /wqflask/maintenance | |
parent | 82f493650909e2351035e26e9dc82b16498beb48 (diff) | |
download | genenetwork2-930d8b0cf0c7bf88fee1de95852dfe883418a494.tar.gz |
Fixed a couple issues with he quick_search_table.py script
Added notes on using percona to optimise mysql to gn_installation_notes.txt
Started moving "get_trait_data" to DataSet since it is the same regardless
of the particular DataSet subclass (PhenotypeDataSet, MrnaAssayDataSet, etc)
Diffstat (limited to 'wqflask/maintenance')
-rw-r--r-- | wqflask/maintenance/quick_search_table.py | 62 |
1 files changed, 33 insertions, 29 deletions
diff --git a/wqflask/maintenance/quick_search_table.py b/wqflask/maintenance/quick_search_table.py index b07e7656..9cd792ef 100644 --- a/wqflask/maintenance/quick_search_table.py +++ b/wqflask/maintenance/quick_search_table.py @@ -71,9 +71,10 @@ class PublishXRef(Base): values['the_key'] = json.dumps([pub_row.Id, pub_row.InbredSetId]) values['terms'] = cls.get_unique_terms(pub_row.Id, pub_row.InbredSetId) print("terms is:", values['terms']) - values['result_fields'] = cls.get_result_fields(pub_row.Id, pub_row.InbredSetId) - ins = QuickSearch.insert().values(**values) - conn.execute(ins) + if values['terms']: + values['result_fields'] = cls.get_result_fields(pub_row.Id, pub_row.InbredSetId) + ins = QuickSearch.insert().values(**values) + conn.execute(ins) counter += 1 print("Done:", counter) @@ -100,28 +101,30 @@ class PublishXRef(Base): inbredset_id=inbredset_id).all() unique = set() - for item in results[0]: - #print("locals:", locals()) - if not item: - continue - for token in item.split(): - if token.startswith(('(','[')): - token = token[1:] - if token.endswith((')', ']')): - token = token[:-1] - if token.endswith(';'): - token = token[:-1] - if len(token) > 2: - try: - # This hopefully ensures that the token is utf-8 - token = token.encode('utf-8') - print(" ->", token) - except UnicodeDecodeError: - print("\n-- UDE \n") - # Can't get it into utf-8, we won't use it - continue - - unique.add(token) + print("results: ", results) + if len(results): + for item in results[0]: + #print("locals:", locals()) + if not item: + continue + for token in item.split(): + if token.startswith(('(','[')): + token = token[1:] + if token.endswith((')', ']')): + token = token[:-1] + if token.endswith(';'): + token = token[:-1] + if len(token) > 2: + try: + # This hopefully ensures that the token is utf-8 + token = token.encode('utf-8') + print(" ->", token) + except UnicodeDecodeError: + print("\n-- UDE \n") + # Can't get it into utf-8, we won't use it + continue + + unique.add(token) #print("\nUnique terms are: {}\n".format(unique)) return " ".join(unique) @@ -467,8 +470,8 @@ QuickSearch = sa.Table("QuickSearch", Metadata, mysql_engine = 'MyISAM', ) -#QuickSearch.drop(Engine, checkfirst=True) -#Metadata.create_all(Engine) +QuickSearch.drop(Engine, checkfirst=True) +Metadata.create_all(Engine) def row2dict(row): @@ -495,9 +498,10 @@ def main(): Add all items from the ProbeSetXRef, GenoXRef, and PublishXRef tables to the QuickSearch tables. """ + + GenoXRef.run() + PublishXRef.run() ProbeSetXRef.run() - #GenoXRef.run() - #PublishXRef.run() if __name__ == "__main__": main()
\ No newline at end of file |