From 15dde5133ac6d72846aa0db631e6660d50cb904e Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Sun, 19 Apr 2020 12:02:28 -0500 Subject: mv some files to utility --- .../topGene_step4_get_pmids_for_all_top_genes.py | 33 ++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100755 utility/topGene_step4_get_pmids_for_all_top_genes.py (limited to 'utility/topGene_step4_get_pmids_for_all_top_genes.py') diff --git a/utility/topGene_step4_get_pmids_for_all_top_genes.py b/utility/topGene_step4_get_pmids_for_all_top_genes.py new file mode 100755 index 0000000..adf527c --- /dev/null +++ b/utility/topGene_step4_get_pmids_for_all_top_genes.py @@ -0,0 +1,33 @@ +import os + +## save all pmids for the top genes so that I don't have to search for these. + +def getPMID(query): + print (query) + pmids=os.popen("esearch -db pubmed -query \"" + query + "\" | efetch -format uid").read() + return(pmids) + +def collectTerms(): + pmids_f=open("topGene_all.pmid","w+") + with open("./topGene_symb_alias.txt", "r") as top: + q=str() + cnt=0 + for one in top: + cnt+=1 + (symb, alias)=one.split("\t") + q+="|"+symb+"|"+alias.strip() + if (cnt==5): + print ("\n") + q=q[1:] + q=q.replace(";", "[tiab] OR ")+"[tiab]" + pmids=getPMID(q) + pmids_f.write(pmids) + cnt=0 + q=str() + print("there should be nothing following the word empty"+q) + +collectTerms() +os.system("sort topGene_all.pmid |uniq > topGene_uniq.pmid" ) +os.system("rm topGene_all.pmid") +print ("results are in topGen_uniq.pmid") + -- cgit v1.2.3