aboutsummaryrefslogtreecommitdiff
path: root/utility/topGene_step4_get_pmids_for_all_top_genes.py
diff options
context:
space:
mode:
Diffstat (limited to 'utility/topGene_step4_get_pmids_for_all_top_genes.py')
-rwxr-xr-xutility/topGene_step4_get_pmids_for_all_top_genes.py33
1 files changed, 33 insertions, 0 deletions
diff --git a/utility/topGene_step4_get_pmids_for_all_top_genes.py b/utility/topGene_step4_get_pmids_for_all_top_genes.py
new file mode 100755
index 0000000..adf527c
--- /dev/null
+++ b/utility/topGene_step4_get_pmids_for_all_top_genes.py
@@ -0,0 +1,33 @@
+import os
+
+## save all pmids for the top genes so that I don't have to search for these.
+
+def getPMID(query):
+ print (query)
+ pmids=os.popen("esearch -db pubmed -query \"" + query + "\" | efetch -format uid").read()
+ return(pmids)
+
+def collectTerms():
+ pmids_f=open("topGene_all.pmid","w+")
+ with open("./topGene_symb_alias.txt", "r") as top:
+ q=str()
+ cnt=0
+ for one in top:
+ cnt+=1
+ (symb, alias)=one.split("\t")
+ q+="|"+symb+"|"+alias.strip()
+ if (cnt==5):
+ print ("\n")
+ q=q[1:]
+ q=q.replace(";", "[tiab] OR ")+"[tiab]"
+ pmids=getPMID(q)
+ pmids_f.write(pmids)
+ cnt=0
+ q=str()
+ print("there should be nothing following the word empty"+q)
+
+collectTerms()
+os.system("sort topGene_all.pmid |uniq > topGene_uniq.pmid" )
+os.system("rm topGene_all.pmid")
+print ("results are in topGen_uniq.pmid")
+