about summary refs log tree commit diff
path: root/utility/topGene_step4_get_pmids_for_all_top_genes.py
diff options
context:
space:
mode:
Diffstat (limited to 'utility/topGene_step4_get_pmids_for_all_top_genes.py')
-rwxr-xr-xutility/topGene_step4_get_pmids_for_all_top_genes.py33
1 files changed, 33 insertions, 0 deletions
diff --git a/utility/topGene_step4_get_pmids_for_all_top_genes.py b/utility/topGene_step4_get_pmids_for_all_top_genes.py
new file mode 100755
index 0000000..adf527c
--- /dev/null
+++ b/utility/topGene_step4_get_pmids_for_all_top_genes.py
@@ -0,0 +1,33 @@
+import os
+
+## save all pmids for the top genes so that I don't have to search for these. 
+
+def getPMID(query):
+    print (query)
+    pmids=os.popen("esearch -db pubmed -query \"" +  query + "\" | efetch -format uid").read()
+    return(pmids)
+
+def collectTerms():
+    pmids_f=open("topGene_all.pmid","w+")
+    with open("./topGene_symb_alias.txt", "r") as top:
+        q=str()
+        cnt=0
+        for one in top:
+            cnt+=1
+            (symb, alias)=one.split("\t")
+            q+="|"+symb+"|"+alias.strip()
+            if (cnt==5):
+                print ("\n")
+                q=q[1:]
+                q=q.replace(";", "[tiab] OR ")+"[tiab]"
+                pmids=getPMID(q)
+                pmids_f.write(pmids)
+                cnt=0
+                q=str()
+        print("there should be nothing following the word empty"+q)
+
+collectTerms()
+os.system("sort topGene_all.pmid |uniq > topGene_uniq.pmid" )
+os.system("rm topGene_all.pmid")
+print ("results are in topGen_uniq.pmid")
+