about summary refs log tree commit diff
path: root/topGene_step3_generate_html.py
diff options
context:
space:
mode:
authorHao Chen2019-05-19 12:17:50 -0500
committerHao Chen2019-05-19 12:17:50 -0500
commitd40664196495a76ce344d94bdf283b512004ce67 (patch)
tree90005288a0968477bd5a6380f234423bc94d96df /topGene_step3_generate_html.py
parent8ec2340063fef684268f11ea5aa631a9457459be (diff)
downloadgenecup-d40664196495a76ce344d94bdf283b512004ce67.tar.gz
search for top addiction genes
Diffstat (limited to 'topGene_step3_generate_html.py')
-rwxr-xr-xtopGene_step3_generate_html.py15
1 files changed, 11 insertions, 4 deletions
diff --git a/topGene_step3_generate_html.py b/topGene_step3_generate_html.py
index dfcd6fe..6755adb 100755
--- a/topGene_step3_generate_html.py
+++ b/topGene_step3_generate_html.py
@@ -1,14 +1,21 @@
 import re
+import sys
 
 ## generate the html page for the top genes
 
 ## put gene names and alias in a dictionary
+#ncbi_gene_symb_syno_name_txid9606_absCnt_sorted_absCnt_sorted_absCnt_absCnt_sorted.txt
+if (len(sys.argv) != 2):
+    print ("please provide the name of a sorted gene abstract count file")
+    sys.exit()
+
 geneNames={}
-with open ("./ncbi_gene_symb_syno_name_txid9606_absCnt_sorted_absCnt_sorted.txt","r") as f:
+with open (sys.argv[1],"r") as f:
     for line in f:
         (genes, count)=line.strip().split("\t")
         gene=genes.split("|")
-        geneNames[gene[0]]=genes.strip()
+        names=re.sub(r'^.*?\|', "", genes)
+        geneNames[gene[0]]=names.strip().replace("|", "; ")
 
 out=str()
 html=str()
@@ -20,8 +27,8 @@ with open("./topGeneAbstractCount.tab" ,"r") as gc:
         print (line)
         pmid_cnt, symb=line.strip().split()
         out+= symb+"\t"+geneNames[symb]+"\n"
-        html+="<li><a href=\"/showTopGene?topGene="+symb+"\">"+symb+"</a><br>\n"
-        if cnt==500:
+        html+="<li><a href=\"/showTopGene?topGene="+symb+"\">"+symb+"</a> <span style=\"font-size:small; color:grey\">("+geneNames[symb]+")</span><br>\n"
+        if cnt==200:
             break
 
 with open("topGene_symb_alias.txt", "w+")  as tg: