aboutsummaryrefslogtreecommitdiff
path: root/topGene_step4_get_pmids_for_all_top_genes.py
diff options
context:
space:
mode:
authorHao Chen2019-05-19 12:17:50 -0500
committerHao Chen2019-05-19 12:17:50 -0500
commitd40664196495a76ce344d94bdf283b512004ce67 (patch)
tree90005288a0968477bd5a6380f234423bc94d96df /topGene_step4_get_pmids_for_all_top_genes.py
parent8ec2340063fef684268f11ea5aa631a9457459be (diff)
downloadgenecup-d40664196495a76ce344d94bdf283b512004ce67.tar.gz
search for top addiction genes
Diffstat (limited to 'topGene_step4_get_pmids_for_all_top_genes.py')
-rwxr-xr-xtopGene_step4_get_pmids_for_all_top_genes.py33
1 files changed, 33 insertions, 0 deletions
diff --git a/topGene_step4_get_pmids_for_all_top_genes.py b/topGene_step4_get_pmids_for_all_top_genes.py
new file mode 100755
index 0000000..9a18836
--- /dev/null
+++ b/topGene_step4_get_pmids_for_all_top_genes.py
@@ -0,0 +1,33 @@
+import os
+
+## save all pmids for the top genes so that I don't have to search for these.
+
+def getPMID(query):
+ print (query)
+ pmids=os.popen("esearch -db pubmed -query \"" + query + "\" | efetch -format uid").read()
+ return(pmids)
+
+def collectTerms():
+ pmids_f=open("topGene_all.pmid","w+")
+ with open("./topGene_symb_alias.txt", "r") as top:
+ q=str()
+ cnt=0
+ for one in top:
+ cnt+=1
+ (symb, alias)=one.split("\t")
+ q+="|"+alias.strip()
+ if (cnt==5):
+ print ("\n")
+ q=q[1:]
+ q=q.replace(";", "[tiab] OR ")+"[tiab]"
+ pmids=getPMID(q)
+ pmids_f.write(pmids)
+ cnt=0
+ q=str()
+ print("there should be nothing following the word empty"+q)
+
+collectTerms()
+os.system("sort topGene_all.pmid |uniq > topGene_uniq.pmid" )
+os.system("rm topGene_all.pmid")
+print ("results are in topGen_uniq.pmid")
+