diff options
author | Zachary Sloan | 2013-08-14 17:25:52 -0500 |
---|---|---|
committer | Zachary Sloan | 2013-08-14 17:25:52 -0500 |
commit | 26b1883a8fe4053b59833178f44e047157f2fc9c (patch) | |
tree | d2359e5952f6062d75ba1f43f1804a7b6df284c9 /wqflask/maintenance | |
parent | eea1c38a9851f31011787b27c14365211a06ea51 (diff) | |
parent | 6379959af53b2ec595b85ccdc099c6f14adf0381 (diff) | |
download | genenetwork2-26b1883a8fe4053b59833178f44e047157f2fc9c.tar.gz |
Merge branch 'master' of https://github.com/zsloan/genenetwork
Diffstat (limited to 'wqflask/maintenance')
-rw-r--r-- | wqflask/maintenance/__init__.py | 0 | ||||
-rw-r--r-- | wqflask/maintenance/browser_test.py | 60 | ||||
-rw-r--r-- | wqflask/maintenance/correlation_matrix_test.py | 117 | ||||
-rw-r--r-- | wqflask/maintenance/correlation_test.py | 110 | ||||
-rw-r--r-- | wqflask/maintenance/gen_select_dataset.py | 94 | ||||
-rw-r--r-- | wqflask/maintenance/get_group_samplelists.py | 43 | ||||
-rw-r--r-- | wqflask/maintenance/marker_regression_test.py | 118 | ||||
l--------- | wqflask/maintenance/our_settings.py | 1 | ||||
-rw-r--r-- | wqflask/maintenance/quick_search_table.py | 62 |
9 files changed, 556 insertions, 49 deletions
diff --git a/wqflask/maintenance/__init__.py b/wqflask/maintenance/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/wqflask/maintenance/__init__.py diff --git a/wqflask/maintenance/browser_test.py b/wqflask/maintenance/browser_test.py new file mode 100644 index 00000000..ecf8e1ca --- /dev/null +++ b/wqflask/maintenance/browser_test.py @@ -0,0 +1,60 @@ +from __future__ import print_function, division, absolute_import + +from time import sleep + +import selenium +from selenium import webdriver +from selenium.common.exceptions import NoSuchElementException, ElementNotVisibleException +from selenium.webdriver.common.keys import Keys + +class Test(object): + def __init__(self): + self.browser = webdriver.Chrome('/home/gn2/gn2/webtests/chromedriver') + + def get(self, url): + self.browser.get(url) + sleep(5) + self.title() + + def click(self, xpath_selector): + el = self.browser.find_element_by_xpath(xpath_selector) + text = el.text.strip() or el.get_attribute("value").strip() + el.click() + print("clicked:", text) + sleep(2) + + def click_option(self, xpath_selector, option_text): + el = self.browser.find_element_by_xpath(xpath_selector) + for option in el.find_elements_by_tag_name('option'): + if option.text == option_text: + option.click() # select() in earlier versions of webdriver + break + sleep(2) + + def enter_text(self, xpath_selector, text): + el = self.browser.find_element_by_xpath(xpath_selector) + sleep(10) + el.send_keys(text) + sleep(5) + # Just in case things get mangled by JavaScript, etc. we print the text for testing + self.get_text(xpath_selector) + + def get_text(self, xpath_selector): + el = self.browser.find_element_by_xpath(xpath_selector) + text = el.text.strip() or el.get_attribute("value").strip() + print("text:", text) + + def switch_window(self): + self.browser.switch_to_window(self.browser.window_handles[-1]) + sleep(2) + self.title() + sleep(2) + + + def title(self): + print("title:", self.browser.title) + + +#if __name__ == '__main__': +# import doctest +# doctest.testmod()
\ No newline at end of file diff --git a/wqflask/maintenance/correlation_matrix_test.py b/wqflask/maintenance/correlation_matrix_test.py new file mode 100644 index 00000000..2983a76b --- /dev/null +++ b/wqflask/maintenance/correlation_matrix_test.py @@ -0,0 +1,117 @@ +""" + +Test Correlation matrix + +>>> test = Test() +>>> test.get("http://genenetwork.org/") +title: GeneNetwork + +Choose the type +>>> test.click_option('''//*[@id="tissue"]''', 'Hippocampus mRNA') + +Enter the Get Any +>>> test.enter_text('''//*[@id="tfor"]''', 'grin2b') +text: grin2b + +Search +>>> test.click('//*[@id="btsearch"]') + +Select the first 4 records +>>> test.click('''/html/body/table/tbody/tr[3]/td/table/tbody/tr/td/form/p/table/tbody/tr[3]/td/div/table/tbody/tr[2]/td/input''') +>>> test.click('''/html/body/table/tbody/tr[3]/td/table/tbody/tr/td/form/p/table/tbody/tr[3]/td/div/table/tbody/tr[3]/td/input''') +>>> test.click('''/html/body/table/tbody/tr[3]/td/table/tbody/tr/td/form/p/table/tbody/tr[3]/td/div/table/tbody/tr[4]/td/input''') +>>> test.click('''/html/body/table/tbody/tr[3]/td/table/tbody/tr/td/form/p/table/tbody/tr[3]/td/div/table/tbody/tr[5]/td/input''') + +>>> sleep(5) + +Add to collection page +>>> test.click('''/html/body/table/tbody/tr[3]/td/table/tbody/tr/td/form/p/table/tbody/tr[1]/td/table/tbody/tr[1]/td[4]/a''') + +>>> sleep(5) + +A new window is created, so we switch to it +>>> test.switch_window() +title: BXD Trait Collection + +Select all records +>>> test.click('''/html/body/table/tbody/tr[3]/td/table/tbody/tr/td/form/table/tbody/tr/td/table/tbody/tr/td/table/tbody/tr/td/a/img''') + +Click Matrix +>>> test.click('''/html/body/table/tbody/tr[3]/td/table/tbody/tr/td/form/table/tbody/tr/td/table/tbody/tr[2]/td/table/tbody/tr/td[2]/a/img''') + +Another new window +>>> test.switch_window() +title: Correlation Matrix + +Sleep a bunch because this can take a while +>>> sleep(10) + +Ensure that the correlation between Trait3 (HC_M2_0606_P::1457003_at) and Trait4 (HC_M2_0606_P::1422223_at) is 0.608 +>>> test.get_text('''/html/body/table/tbody/tr[3]/td/table/tbody/tr/td/form/blockquote/table/tbody/tr[5]/td[5]/a/font''') +text: 0.608 + +""" + +from __future__ import print_function, division, absolute_import + +from time import sleep + +import selenium +from selenium import webdriver +from selenium.common.exceptions import NoSuchElementException, ElementNotVisibleException +from selenium.webdriver.common.keys import Keys + +from browser_test import Test +# +#class Test(object): +# def __init__(self): +# self.browser = webdriver.Chrome('/home/gn2/gn2/webtests/chromedriver') +# +# def get(self, url): +# self.browser.get(url) +# sleep(5) +# self.title() +# +# def click(self, xpath_selector): +# el = self.browser.find_element_by_xpath(xpath_selector) +# #text = el.text.strip() or el.get_attribute("value").strip() +# el.click() +# #print("clicked:", text) +# sleep(2) +# +# def click_option(self, xpath_selector, option_text): +# el = self.browser.find_element_by_xpath(xpath_selector) +# for option in el.find_elements_by_tag_name('option'): +# if option.text == option_text: +# option.click() # select() in earlier versions of webdriver +# break +# sleep(2) +# +# def enter_text(self, xpath_selector, text): +# el = self.browser.find_element_by_xpath(xpath_selector) +# sleep(10) +# el.send_keys(text) +# sleep(5) +# # Just in case things get mangled by JavaScript, etc. we print the text for testing +# self.get_text(xpath_selector) +# +# def get_text(self, xpath_selector): +# el = self.browser.find_element_by_xpath(xpath_selector) +# text = el.text.strip() or el.get_attribute("value").strip() +# print("text:", text) +# +# def switch_window(self): +# self.browser.switch_to_window(self.browser.window_handles[-1]) +# sleep(2) +# self.title() +# sleep(2) +# +# +# def title(self): +# print("title:", self.browser.title) +# +# +# +if __name__ == '__main__': + import doctest + doctest.testmod() diff --git a/wqflask/maintenance/correlation_test.py b/wqflask/maintenance/correlation_test.py new file mode 100644 index 00000000..aaf3f7df --- /dev/null +++ b/wqflask/maintenance/correlation_test.py @@ -0,0 +1,110 @@ +""" +Test calculate correlations + +>>> test = Test() +>>> test.get("http://genenetwork.org") +title: GeneNetwork + +Choose the type +>>> test.click_option('''//*[@id="tissue"]''', 'Hippocampus mRNA') + +Enter the Get Any +>>> test.enter_text('''//*[@id="tfor"]''', 'ssh') +text: ssh + +Search +>>> test.click('//*[@id="btsearch"]') +clicked: Search + +Choose the first result +>>> test.click('''/html/body/table/tbody/tr[3]/td/table/tbody/tr/td/form/p/table/tbody/tr[3]/td/div/table/tbody/tr[2]/td[2]/a''') +clicked: 1455854_a_at + +A new window is created, so we switch to it +>>> test.switch_window() +title: Hippocampus M430v2 BXD 06/06 PDNN : 1455854_a_at: Display Trait + +Click on Calculate Correlations +>>> test.click('''//*[@id="title3"]''') +clicked: Calculate Correlations + +Click on Compute +>>> test.click('''/html/body/table/tbody/tr[3]/td/table/tbody/tr/td/form/p[6]/table/tbody/tr/td/div/div/span/table/tbody/tr/td/input[3]''') +clicked: Compute + +Another new window +>>> test.switch_window() +title: Correlation + +Sleep a bunch because this can take a while +>>> sleep(60) + +Ensure the Sample rho is the exepcted 1.000 because it should be the same record +>>> test.get_text('''/html/body/table/tbody/tr[3]/td/table/tbody/tr/td/form/table/tbody/tr[2]/td/div/table/tbody/tr[2]/td[9]/a''') +text: 1.000 + +""" + +from __future__ import print_function, division, absolute_import + +from time import sleep + + +import selenium +from selenium import webdriver +from selenium.common.exceptions import NoSuchElementException, ElementNotVisibleException +from selenium.webdriver.common.keys import Keys + + +class Test(object): + def __init__(self): + self.browser = webdriver.Chrome('/home/gn2/gn2/webtests/chromedriver') + + def get(self, url): + self.browser.get(url) + sleep(5) + self.title() + + def click(self, xpath_selector): + el = self.browser.find_element_by_xpath(xpath_selector) + text = el.text.strip() or el.get_attribute("value").strip() + el.click() + print("clicked:", text) + sleep(2) + + def click_option(self, xpath_selector, option_text): + el = self.browser.find_element_by_xpath(xpath_selector) + for option in el.find_elements_by_tag_name('option'): + if option.text == option_text: + option.click() # select() in earlier versions of webdriver + break + sleep(2) + + def enter_text(self, xpath_selector, text): + el = self.browser.find_element_by_xpath(xpath_selector) + sleep(10) + el.send_keys(text) + sleep(5) + # Just in case things get mangled by JavaScript, etc. we print the text for testing + self.get_text(xpath_selector) + + def get_text(self, xpath_selector): + el = self.browser.find_element_by_xpath(xpath_selector) + text = el.text.strip() or el.get_attribute("value").strip() + print("text:", text) + + def switch_window(self): + self.browser.switch_to_window(self.browser.window_handles[-1]) + sleep(2) + self.title() + sleep(2) + + + def title(self): + print("title:", self.browser.title) + + + +if __name__ == '__main__': + import doctest + doctest.testmod() diff --git a/wqflask/maintenance/gen_select_dataset.py b/wqflask/maintenance/gen_select_dataset.py index 4c544192..ad560659 100644 --- a/wqflask/maintenance/gen_select_dataset.py +++ b/wqflask/maintenance/gen_select_dataset.py @@ -29,25 +29,47 @@ It needs to be run manually when database has been changed. from __future__ import print_function, division -import sys +#from flask import config +# +#cdict = {} +#config = config.Config(cdict).from_envvar('WQFLASK_SETTINGS') +#print("cdict is:", cdict) -sys.path.insert(0, "..") +import our_settings import MySQLdb import simplejson as json +import urlparse -from pprint import pformat as pf -from base import webqtlConfig +#import sqlalchemy as sa + +from pprint import pformat as pf +#Engine = sa.create_engine(our_settings.SQLALCHEMY_DATABASE_URI) # build MySql database connection -Con = MySQLdb.Connect(db=webqtlConfig.DB_NAME, - host=webqtlConfig.MYSQL_SERVER, - user=webqtlConfig.DB_USER, - passwd=webqtlConfig.DB_PASSWD) -Cursor = Con.cursor() + +#conn = Engine.connect() + + + + + +def parse_db_uri(db_uri): + """Converts a database URI to the db name, host name, user name, and password""" + + parsed_uri = urlparse.urlparse(our_settings.DB_URI) + + db_conn_info = dict( + db = parsed_uri.path[1:], + host = parsed_uri.hostname, + user = parsed_uri.username, + passwd = parsed_uri.password) + + return db_conn_info + def get_species(): @@ -77,12 +99,14 @@ def get_groups(species): def get_types(groups): """Build types list""" types = {} + print("Groups: ", pf(groups)) for species, group_dict in groups.iteritems(): types[species] = {} for group_name, _group_full_name in group_dict: # make group an alias to shorten the code types[species][group_name] = [("Phenotypes", "Phenotypes"), ("Genotypes", "Genotypes")] types[species][group_name] += build_types(species, group_name) + return types @@ -93,17 +117,32 @@ def build_types(species, group): (all types except phenotype/genotype are tissues) """ - Cursor.execute("""select distinct Tissue.Name, concat(Tissue.Name, ' mRNA') + + print("""select distinct Tissue.Name + from ProbeFreeze, ProbeSetFreeze, InbredSet, Tissue, Species + where Species.Name = '{}' and Species.Id = InbredSet.SpeciesId and + InbredSet.Name = '{}' and + ProbeFreeze.TissueId = Tissue.Id and + ProbeFreeze.InbredSetId = InbredSet.Id and + ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and + ProbeSetFreeze.public > 0 + order by Tissue.Name""".format(species, group)) + Cursor.execute("""select distinct Tissue.Name from ProbeFreeze, ProbeSetFreeze, InbredSet, Tissue, Species where Species.Name = %s and Species.Id = InbredSet.SpeciesId and InbredSet.Name = %s and ProbeFreeze.TissueId = Tissue.Id and ProbeFreeze.InbredSetId = InbredSet.Id and ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and - ProbeSetFreeze.public > %s - order by Tissue.Name""", (species, group, webqtlConfig.PUBLICTHRESH)) - return Cursor.fetchall() - + ProbeSetFreeze.public > 0 + order by Tissue.Name""", (species, group)) + + results = [] + for result in Cursor.fetchall(): + if len(result): + results.append((result[0], result[0])) + + return results def get_datasets(types): """Build datasets list""" @@ -111,9 +150,10 @@ def get_datasets(types): for species, group_dict in types.iteritems(): datasets[species] = {} for group, type_list in group_dict.iteritems(): + print("type_list: ", type_list) datasets[species][group] = {} - for type_name, _type_full_name in type_list: - datasets[species][group][type_name] = build_datasets(species, group, type_name) + for type_name in type_list: + datasets[species][group][type_name[0]] = build_datasets(species, group, type_name[0]) return datasets @@ -134,20 +174,30 @@ def build_datasets(species, group, type_name): if dataset_value: return [(dataset_value, dataset_text)] else: + print("""select ProbeSetFreeze.Name, ProbeSetFreeze.FullName from + ProbeSetFreeze, ProbeFreeze, InbredSet, Tissue, Species where + Species.Name = '{}' and Species.Id = InbredSet.SpeciesId and + InbredSet.Name = '{}' and + ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and Tissue.Name = '{}' + and ProbeFreeze.TissueId = Tissue.Id and ProbeFreeze.InbredSetId = + InbredSet.Id and ProbeSetFreeze.public > 0 order by + ProbeSetFreeze.CreateTime desc""".format(species, group, type_name)) Cursor.execute("""select ProbeSetFreeze.Name, ProbeSetFreeze.FullName from ProbeSetFreeze, ProbeFreeze, InbredSet, Tissue, Species where Species.Name = %s and Species.Id = InbredSet.SpeciesId and InbredSet.Name = %s and ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and Tissue.Name = %s and ProbeFreeze.TissueId = Tissue.Id and ProbeFreeze.InbredSetId = - InbredSet.Id and ProbeSetFreeze.public > %s order by - ProbeSetFreeze.CreateTime desc""", ( - species, group, type_name, webqtlConfig.PUBLICTHRESH)) + InbredSet.Id and ProbeSetFreeze.public > 0 order by + ProbeSetFreeze.CreateTime desc""", (species, group, type_name)) return Cursor.fetchall() def main(): """Generates and outputs (as json file) the data for the main dropdown menus on the home page""" + + parse_db_uri(our_settings.DB_URI) + species = get_species() groups = get_groups(species) types = get_types(groups) @@ -167,6 +217,8 @@ def main(): datasets=datasets, ) + print("data:", data) + output_file = """../wqflask/static/new/javascript/dataset_menu_structure.json""" with open(output_file, 'w') as fh: @@ -182,5 +234,7 @@ def _test_it(): datasets = build_datasets("Mouse", "BXD", "Hippocampus") print("build_datasets:", pf(datasets)) -if __name__ == '__main__': +if __name__ == '__main__': + Conn = MySQLdb.Connect(**parse_db_uri(our_settings.DB_URI)) + Cursor = Conn.cursor() main() diff --git a/wqflask/maintenance/get_group_samplelists.py b/wqflask/maintenance/get_group_samplelists.py new file mode 100644 index 00000000..c9ec3872 --- /dev/null +++ b/wqflask/maintenance/get_group_samplelists.py @@ -0,0 +1,43 @@ +from __future__ import absolute_import, print_function, division + +import os +import glob +import gzip + +from base import webqtlConfig + + +def process_genofiles(geno_dir=webqtlConfig.GENODIR): + print("Yabba") + #sys.exit("Dabba") + os.chdir(geno_dir) + for geno_file in glob.glob("*"): + if geno_file.lower().endswith(('.geno', '.geno.gz')): + #group_name = genofilename.split('.')[0] + sample_list = get_samplelist(geno_file) + + +def get_samplelist(geno_file): + genofilename = os.path.join(webqtlConfig.GENODIR, geno_file) + if os.path.isfile(genofilename + '.gz'): + genofilename += '.gz' + genofile = gzip.open(genofilename) + else: + genofile = open(genofilename) + + for line in genofile: + line = line.strip() + if not line: + continue + if line.startswith(("#", "@")): + continue + break + + headers = line.split() + + if headers[3] == "Mb": + samplelist = headers[4:] + else: + samplelist = headers[3:] + return samplelist + diff --git a/wqflask/maintenance/marker_regression_test.py b/wqflask/maintenance/marker_regression_test.py new file mode 100644 index 00000000..f4840bd7 --- /dev/null +++ b/wqflask/maintenance/marker_regression_test.py @@ -0,0 +1,118 @@ +""" +Test calculate correlations + +>>> test = Test() +>>> test.get("http://genenetwork.org") +title: GeneNetwork + +Choose the type +>>> test.click_option('''//*[@id="tissue"]''', 'Liver mRNA') +clicked: Liver mRNA + +Enter the Get Any +>>> test.enter_text('''//*[@id="tfor"]''', 'grin2b') +text: grin2b + +Search +>>> test.click('//*[@id="btsearch"]') +clicked: Search + +Choose the second result +>>> test.click('''/html/body/table/tbody/tr[3]/td/table/tbody/tr/td/form/p/table/tbody/tr[3]/td/div/table/tbody/tr[3]/td[2]/a''') +clicked: 1431700_at_A + +A new window is created, so we switch to it +>>> test.switch_window() +title: GSE16780 UCLA Hybrid MDP Liver Affy HT M430A (Sep11) RMA : 1431700_at_A: Display Trait + +Click on Mapping Tools +>>> test.click('''//*[@id="title4"]''') +clicked: Mapping Tools + +Click on Marker Regression tab +>>> test.click('''/html/body/table/tbody/tr[3]/td/table/tbody/tr/td/form/p[8]/table/tbody/tr/td/div/ul/li[2]/a''') +clicked: Marker Regression + +Click on Compute +>>> test.click('''/html/body/table/tbody/tr[3]/td/table/tbody/tr/td/form/p[8]/table/tbody/tr/td/div/div[2]/span/table/tbody/tr/td/input''') +clicked: Compute + +Another new window +>>> test.switch_window() +title: Genome Association Result + +Sleep a bunch because this can take a while +>>> sleep(60) + +Ensure that the LRS of the top record is the exepcted value +>>> test.get_text('''/html/body/table/tbody/tr[3]/td/table/tbody/tr[4]/td/table/tbody/tr/td/div/table/tbody/tr[2]/td[2]''') +text: 11.511 + +""" + +from __future__ import print_function, division, absolute_import + +from browser_test import Test + +# +#from time import sleep +# +# +#import selenium +#from selenium import webdriver +#from selenium.common.exceptions import NoSuchElementException, ElementNotVisibleException +#from selenium.webdriver.common.keys import Keys +# +# +#class Test(object): +# def __init__(self): +# self.browser = webdriver.Chrome('/home/gn2/gn2/webtests/chromedriver') +# +# def get(self, url): +# self.browser.get(url) +# sleep(5) +# self.title() +# +# def click(self, xpath_selector): +# el = self.browser.find_element_by_xpath(xpath_selector) +# text = el.text.strip() or el.get_attribute("value").strip() +# el.click() +# print("clicked:", text) +# sleep(2) +# +# def click_option(self, xpath_selector, option_text): +# el = self.browser.find_element_by_xpath(xpath_selector) +# for option in el.find_elements_by_tag_name('option'): +# if option.text == option_text: +# option.click() # select() in earlier versions of webdriver +# break +# sleep(2) +# +# def enter_text(self, xpath_selector, text): +# el = self.browser.find_element_by_xpath(xpath_selector) +# sleep(10) +# el.send_keys(text) +# sleep(5) +# # Just in case things get mangled by JavaScript, etc. we print the text for testing +# self.get_text(xpath_selector) +# +# def get_text(self, xpath_selector): +# el = self.browser.find_element_by_xpath(xpath_selector) +# text = el.text.strip() or el.get_attribute("value").strip() +# print("text:", text) +# +# def switch_window(self): +# self.browser.switch_to_window(self.browser.window_handles[-1]) +# sleep(2) +# self.title() +# sleep(2) +# +# +# def title(self): +# print("title:", self.browser.title) + + + +if __name__ == '__main__': + import doctest + doctest.testmod() diff --git a/wqflask/maintenance/our_settings.py b/wqflask/maintenance/our_settings.py new file mode 120000 index 00000000..14efe407 --- /dev/null +++ b/wqflask/maintenance/our_settings.py @@ -0,0 +1 @@ +../../../zach_settings.py
\ No newline at end of file diff --git a/wqflask/maintenance/quick_search_table.py b/wqflask/maintenance/quick_search_table.py index b07e7656..9cd792ef 100644 --- a/wqflask/maintenance/quick_search_table.py +++ b/wqflask/maintenance/quick_search_table.py @@ -71,9 +71,10 @@ class PublishXRef(Base): values['the_key'] = json.dumps([pub_row.Id, pub_row.InbredSetId]) values['terms'] = cls.get_unique_terms(pub_row.Id, pub_row.InbredSetId) print("terms is:", values['terms']) - values['result_fields'] = cls.get_result_fields(pub_row.Id, pub_row.InbredSetId) - ins = QuickSearch.insert().values(**values) - conn.execute(ins) + if values['terms']: + values['result_fields'] = cls.get_result_fields(pub_row.Id, pub_row.InbredSetId) + ins = QuickSearch.insert().values(**values) + conn.execute(ins) counter += 1 print("Done:", counter) @@ -100,28 +101,30 @@ class PublishXRef(Base): inbredset_id=inbredset_id).all() unique = set() - for item in results[0]: - #print("locals:", locals()) - if not item: - continue - for token in item.split(): - if token.startswith(('(','[')): - token = token[1:] - if token.endswith((')', ']')): - token = token[:-1] - if token.endswith(';'): - token = token[:-1] - if len(token) > 2: - try: - # This hopefully ensures that the token is utf-8 - token = token.encode('utf-8') - print(" ->", token) - except UnicodeDecodeError: - print("\n-- UDE \n") - # Can't get it into utf-8, we won't use it - continue - - unique.add(token) + print("results: ", results) + if len(results): + for item in results[0]: + #print("locals:", locals()) + if not item: + continue + for token in item.split(): + if token.startswith(('(','[')): + token = token[1:] + if token.endswith((')', ']')): + token = token[:-1] + if token.endswith(';'): + token = token[:-1] + if len(token) > 2: + try: + # This hopefully ensures that the token is utf-8 + token = token.encode('utf-8') + print(" ->", token) + except UnicodeDecodeError: + print("\n-- UDE \n") + # Can't get it into utf-8, we won't use it + continue + + unique.add(token) #print("\nUnique terms are: {}\n".format(unique)) return " ".join(unique) @@ -467,8 +470,8 @@ QuickSearch = sa.Table("QuickSearch", Metadata, mysql_engine = 'MyISAM', ) -#QuickSearch.drop(Engine, checkfirst=True) -#Metadata.create_all(Engine) +QuickSearch.drop(Engine, checkfirst=True) +Metadata.create_all(Engine) def row2dict(row): @@ -495,9 +498,10 @@ def main(): Add all items from the ProbeSetXRef, GenoXRef, and PublishXRef tables to the QuickSearch tables. """ + + GenoXRef.run() + PublishXRef.run() ProbeSetXRef.run() - #GenoXRef.run() - #PublishXRef.run() if __name__ == "__main__": main()
\ No newline at end of file |